summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
committerdim <dim@FreeBSD.org>2015-12-30 13:13:10 +0000
commit9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a (patch)
treeb466a4817f79516eb1df8eae92bccf62ecc84003 /contrib/llvm/lib/Target
parentf09a28d1de99fda4f5517fb12670fc36552f4927 (diff)
parente194cd6d03d91631334d9d5e55b506036f423cc8 (diff)
downloadFreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.zip
FreeBSD-src-9b5bf5c4f53d65d6a48722d7410ed7cb15f5ba3a.tar.gz
Update llvm to trunk r256633.
Diffstat (limited to 'contrib/llvm/lib/Target')
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64.td43
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp1
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp17
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h38
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td19
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp8
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp32
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp22
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp20
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp16
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp15
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp221
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp93
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h6
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp246
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1446
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h110
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td1076
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp211
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h12
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td483
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp1304
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h42
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h17
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h5
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp31
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h27
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp150
-rw-r--r--contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h39
-rw-r--r--contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp201
-rw-r--r--contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp46
-rw-r--r--contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h10
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h26
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp6
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h8
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp88
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp5
-rw-r--r--contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h2
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp33
-rw-r--r--contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h41
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.h16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPU.td10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp126
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp84
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp200
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h48
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp479
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp195
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h15
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp20
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp373
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp25
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp77
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp87
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h51
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp102
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp43
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp266
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/CIInstructions.td340
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp16
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp27
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h40
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp11
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp51
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp15
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Processors.td4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp21
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Instructions.td2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp5
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp6
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp229
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp133
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp204
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp243
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h34
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp660
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h12
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp88
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td44
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1332
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h154
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td837
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIInstructions.td485
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp36
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp1
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp109
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h232
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp193
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp271
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h64
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td117
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SISchedule.td18
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp61
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp10
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp97
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h26
-rw-r--r--contrib/llvm/lib/Target/AMDGPU/VIInstructions.td61
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.h1
-rw-r--r--contrib/llvm/lib/Target/ARM/ARM.td708
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp119
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp174
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h23
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp17
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.h22
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMCallingConv.td2
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp44
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp64
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFastISel.cpp112
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp202
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp275
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp2101
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMISelLowering.h49
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrInfo.td73
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrNEON.td440
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb.td163
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td95
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMInstrVFP.td55
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp504
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h14
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td37
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td1046
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp56
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp175
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMSubtarget.h72
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp50
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetMachine.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp170
-rw-r--r--contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h33
-rw-r--r--contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp204
-rw-r--r--contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp113
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp389
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h7
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h8
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp54
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h59
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp99
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h3
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp32
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp12
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp291
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h36
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp4
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp16
-rw-r--r--contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp47
-rw-r--r--contrib/llvm/lib/Target/AVR/AVR.td563
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRCallingConv.td65
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRConfig.h15
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h73
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td216
-rw-r--r--contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp4
-rw-r--r--contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp25
-rw-r--r--contrib/llvm/lib/Target/BPF/BPF.td7
-rw-r--r--contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp25
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp91
-rw-r--r--contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp2152
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/BitTracker.h16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp1010
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.h7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/Hexagon.td82
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp435
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp2778
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp33
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp27
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp43
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp1063
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp435
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h11
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp319
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp16
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp160
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp776
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h48
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td462
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td1019
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td46
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td238
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp3980
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h390
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td130
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td10
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td2241
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td43
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td836
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp60
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp6
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp53
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOperands.td368
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp150
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp4
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp94
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td81
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp91
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td8
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td5
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td170
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td310
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp50
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp2
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp1209
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp616
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp92
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h20
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp97
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h15
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp7
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h70
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp1975
-rw-r--r--contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h114
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp52
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h65
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp268
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h119
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp581
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h218
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp24
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp29
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp126
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp12
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp49
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h35
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp228
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h58
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp52
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h17
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp90
-rw-r--r--contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h52
-rw-r--r--contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp17
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp8
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2226
-rw-r--r--contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp318
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp13
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h7
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp62
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h1
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp17
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h3
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h5
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h13
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp73
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h25
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp82
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td579
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td887
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td86
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td119
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td244
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td528
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td28
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td81
-rw-r--r--contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td174
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips.td17
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp8
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp202
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td120
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td298
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td91
-rw-r--r--contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td12
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp13
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCCState.cpp16
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsCallingConv.td25
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp31
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td38
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td378
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp34
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td84
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td192
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsFastISel.cpp59
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp125
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsISelLowering.h37
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFPU.td42
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrFormats.td8
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsInstrInfo.td510
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp9
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td210
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp69
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsMachineFunction.h51
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp59
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp241
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h10
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp22
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp85
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSchedule.td68
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td392
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp5
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsSubtarget.h21
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h4
-rw-r--r--contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h19
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp132
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h15
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp10
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp18
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp59
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp183
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h17
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp66
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h1
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp353
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp102
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXSection.h9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp60
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h5
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp105
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h21
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXVector.td58
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h25
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp267
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp253
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp26
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp87
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp242
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp140
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp502
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp131
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td20
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td612
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp131
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp230
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp114
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h15
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp52
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp141
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h32
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp14
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp53
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp89
-rw-r--r--contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp248
-rw-r--r--contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp52
-rw-r--r--contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h1
-rw-r--r--contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcCallingConv.td9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp154
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h8
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp183
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp295
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcISelLowering.h18
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td9
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp39
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td146
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp64
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h4
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td54
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp6
-rw-r--r--contrib/llvm/lib/Target/Sparc/SparcSubtarget.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp19
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/README.txt6
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp15
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp86
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp159
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h20
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h8
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td55
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td1
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp77
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td34
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp4
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp128
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp24
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h3
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp10
-rw-r--r--contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h11
-rw-r--r--contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp57
-rw-r--r--contrib/llvm/lib/Target/TargetMachine.cpp20
-rw-r--r--contrib/llvm/lib/Target/TargetMachineC.cpp46
-rw-r--r--contrib/llvm/lib/Target/TargetRecip.cpp12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp94
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h18
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp103
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp54
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp100
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp41
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h16
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/README.txt68
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Relooper.cpp984
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/Relooper.h186
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.h14
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssembly.td12
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp110
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp285
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp468
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp81
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp117
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h3
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def25
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp57
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp602
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h50
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td67
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td82
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td125
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td117
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td84
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp133
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h20
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td115
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td105
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td529
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp133
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp106
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h45
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h68
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp76
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp1066
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp86
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp175
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp109
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp265
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp60
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td32
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp124
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp1
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h8
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp96
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp24
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h43
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp5
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h4
-rw-r--r--contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt311
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp274
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h10
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp319
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h19
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp7
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp47
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h2
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h3
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp645
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp83
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h48
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h69
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp11
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp184
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h13
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp99
-rw-r--r--contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp4
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp199
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h27
-rw-r--r--contrib/llvm/lib/Target/X86/X86.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td655
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp25
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp109
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.h59
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td79
-rw-r--r--contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp21
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp168
-rw-r--r--contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp89
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp76
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp1162
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.h65
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp505
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp6416
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h124
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrAVX512.td3696
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td69
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrBuilder.h7
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrCompiler.td216
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td13
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFMA.td252
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFPStack.td136
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td336
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp1514
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h142
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td257
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrMMX.td24
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td761
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td140
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td91
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrXOP.td115
-rw-r--r--contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h980
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCInstLower.cpp216
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h8
-rw-r--r--contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp326
-rw-r--r--contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp318
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h25
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td41
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp46
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h57
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp17
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp879
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h64
-rw-r--r--contrib/llvm/lib/Target/X86/X86WinEHState.cpp234
-rw-r--r--contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp18
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp38
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp5
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp26
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreISelLowering.h14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp18
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp9
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp14
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h2
-rw-r--r--contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h2
649 files changed, 74113 insertions, 24127 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index 9a7d6c8..0bff9b5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,15 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+ "Enable ARMv8 PMUv3 Performance Monitors extension">;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Full FP16", [FeatureFPARMv8]>;
+
+def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
+ "Enable Statistical Profiling extension">;
+
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -40,6 +49,15 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
+def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
+ "Reserve X18, making it unavailable "
+ "as a GPR">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -47,6 +65,9 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions", [FeatureCRC]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions", [HasV8_1aOps]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -70,19 +91,29 @@ include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors",
+ [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureCRC,
+ FeaturePerfMon]>;
+
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
FeatureCrypto,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
"Cyclone",
@@ -90,12 +121,16 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
FeatureNEON,
FeatureCrypto,
FeatureCRC,
+ FeaturePerfMon,
FeatureZCRegMove, FeatureZCZeroing]>;
def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
- FeatureCRC]>;
+ FeatureCRC,
+ FeaturePerfMon]>;
+// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -109,11 +144,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
def GenericAsmParserVariant : AsmParserVariant {
int Variant = 0;
string Name = "generic";
+ string BreakCharacters = ".";
}
def AppleAsmParserVariant : AsmParserVariant {
int Variant = 1;
string Name = "apple-neon";
+ string BreakCharacters = ".";
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index d7ef3f4..d215d9e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -122,7 +122,7 @@ AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
// Get the previous machine basic block in the function.
- MachineFunction::iterator MBBI = *MBB;
+ MachineFunction::iterator MBBI(MBB);
// Can't go off top of function.
if (MBBI == MBB->getParent()->begin())
@@ -131,7 +131,7 @@ static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 2> Cond;
- MachineBasicBlock *PrevBB = std::prev(MBBI);
+ MachineBasicBlock *PrevBB = &*std::prev(MBBI);
for (MachineBasicBlock *S : MBB->predecessors())
if (S == PrevBB && !TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond) &&
!TBB && !FBB)
@@ -151,10 +151,9 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
// If there is no non-pseudo in the current block, loop back around and try
// the previous block (if there is one).
while ((FMBB = getBBFallenThrough(FMBB, TII))) {
- for (auto I = FMBB->rbegin(), E = FMBB->rend(); I != E; ++I) {
- if (!I->isPseudo())
- return &*I;
- }
+ for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
+ if (!I.isPseudo())
+ return &I;
}
// There was no previous non-pseudo in the fallen through blocks
@@ -217,8 +216,8 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
++Idx;
}
- DEBUG(dbgs() << "Scan complete, "<< Sequences.size()
- << " occurences of pattern found.\n");
+ DEBUG(dbgs() << "Scan complete, " << Sequences.size()
+ << " occurrences of pattern found.\n");
// Then update the basic block, inserting nops between the detected sequences.
for (auto &MI : Sequences) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 9d6dbd6..79a84ad 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -593,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
if (Change) {
Substs[MO.getReg()] = Reg;
MO.setReg(Reg);
- MRI->setPhysRegUsed(Reg);
Changed = true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 716e1a3..3afcdfb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -57,6 +57,8 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
" the other."),
cl::init(true));
+#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion"
+
//===----------------------------------------------------------------------===//
// AArch64AddressTypePromotion
//===----------------------------------------------------------------------===//
@@ -76,7 +78,7 @@ public:
}
const char *getPassName() const override {
- return "AArch64 Address Type Promotion";
+ return AARCH64_TYPE_PROMO_NAME;
}
/// Iterate over the functions and promote the computation of interesting
@@ -143,10 +145,10 @@ private:
char AArch64AddressTypePromotion::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
- "AArch64 Type Promotion Pass", false, false)
+ AARCH64_TYPE_PROMO_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
- "AArch64 Type Promotion Pass", false, false)
+ AARCH64_TYPE_PROMO_NAME, false, false)
FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
return new AArch64AddressTypePromotion();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 18d21fd..1644d71 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -61,6 +61,12 @@ STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
+namespace llvm {
+void initializeAArch64AdvSIMDScalarPass(PassRegistry &);
+}
+
+#define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization"
+
namespace {
class AArch64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
@@ -82,12 +88,14 @@ private:
public:
static char ID; // Pass identification, replacement for typeid.
- explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
+ explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {
+ initializeAArch64AdvSIMDScalarPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
const char *getPassName() const override {
- return "AdvSIMD Scalar Operation Optimization";
+ return AARCH64_ADVSIMD_NAME;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -98,6 +106,9 @@ public:
char AArch64AdvSIMDScalar::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(AArch64AdvSIMDScalar, "aarch64-simd-scalar",
+ AARCH64_ADVSIMD_NAME, false, false)
+
static bool isGPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (SubReg)
@@ -381,7 +392,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
- if (processMachineBasicBlock(I))
+ if (processMachineBasicBlock(&*I))
Changed = true;
return Changed;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index d973234..a614f55 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -45,6 +45,12 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
STATISTIC(NumSplit, "Number of basic blocks split");
STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
+namespace llvm {
+void initializeAArch64BranchRelaxationPass(PassRegistry &);
+}
+
+#define AARCH64_BR_RELAX_NAME "AArch64 branch relaxation pass"
+
namespace {
class AArch64BranchRelaxation : public MachineFunctionPass {
/// BasicBlockInfo - Information about the offset and size of a single
@@ -93,17 +99,22 @@ class AArch64BranchRelaxation : public MachineFunctionPass {
public:
static char ID;
- AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
+ AArch64BranchRelaxation() : MachineFunctionPass(ID) {
+ initializeAArch64BranchRelaxationPass(*PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "AArch64 branch relaxation pass";
+ return AARCH64_BR_RELAX_NAME;
}
};
char AArch64BranchRelaxation::ID = 0;
}
+INITIALIZE_PASS(AArch64BranchRelaxation, "aarch64-branch-relax",
+ AARCH64_BR_RELAX_NAME, false, false)
+
/// verify - check BBOffsets, BBSizes, alignment of islands
void AArch64BranchRelaxation::verify() {
#ifndef NDEBUG
@@ -131,14 +142,14 @@ void AArch64BranchRelaxation::dumpBBs() {
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI(MBB);
// Can't fall off end of function.
- MachineBasicBlock *NextBB = std::next(MBBI);
+ auto NextBB = std::next(MBBI);
if (NextBB == MBB->getParent()->end())
return false;
for (MachineBasicBlock *S : MBB->successors())
- if (S == NextBB)
+ if (S == &*NextBB)
return true;
return false;
@@ -216,9 +227,7 @@ AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB;
- ++MBBI;
- MF->insert(MBBI, NewBB);
+ MF->insert(++OrigBB->getIterator(), NewBB);
// Splice the instructions starting with MI over to NewBB.
NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -421,7 +430,7 @@ bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MBB->replaceSuccessor(FBB, NewBB);
NewBB->addSuccessor(FBB);
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*std::next(MachineFunction::iterator(MBB));
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< ", invert condition and change dest. to BB#"
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 1e2d1c3..bc44bc5 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -25,30 +25,28 @@
namespace {
using namespace llvm;
-static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
- AArch64::X6, AArch64::X7};
-static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
- AArch64::H3, AArch64::H4, AArch64::H5,
- AArch64::H6, AArch64::H7};
-static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
- AArch64::S3, AArch64::S4, AArch64::S5,
- AArch64::S6, AArch64::S7};
-static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
- AArch64::D3, AArch64::D4, AArch64::D5,
- AArch64::D6, AArch64::D7};
-static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
- AArch64::Q3, AArch64::Q4, AArch64::Q5,
- AArch64::Q6, AArch64::Q7};
+static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+ AArch64::H3, AArch64::H4, AArch64::H5,
+ AArch64::H6, AArch64::H7};
+static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+ AArch64::S3, AArch64::S4, AArch64::S5,
+ AArch64::S6, AArch64::S7};
+static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+ AArch64::D3, AArch64::D4, AArch64::D5,
+ AArch64::D6, AArch64::D7};
+static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
CCState &State, unsigned SlotAlign) {
unsigned Size = LocVT.getSizeInBits() / 8;
- unsigned StackAlign = State.getMachineFunction()
- .getTarget()
- .getDataLayout()
- ->getStackAlignment();
+ unsigned StackAlign =
+ State.getMachineFunction().getDataLayout().getStackAlignment();
unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
for (auto &It : PendingMembers) {
@@ -88,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- ArrayRef<uint16_t> RegList;
+ ArrayRef<MCPhysReg> RegList;
if (LocVT.SimpleTy == MVT::i64)
RegList = XRegList;
else if (LocVT.SimpleTy == MVT::f16)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 815ebef..388d64e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
/// CCIfBigEndian - Match only if we're in big endian mode.
class CCIfBigEndian<CCAction A> :
- CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
+ CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -279,6 +279,23 @@ def CSR_AArch64_TLS_Darwin
FP,
(sequence "Q%u", 0, 31))>;
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
+// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
+def CSR_AArch64_CXX_TLS_Darwin
+ : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+ (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+ (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_AArch64_CXX_TLS_Darwin_PE
+ : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
+ : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
def CSR_AArch64_TLS_ELF
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 06ff9af..9310ac4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -117,10 +117,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
*TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
// Insert a copy from X0 to TLSBaseAddrReg for later.
- MachineInstr *Next = I->getNextNode();
- MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- *TLSBaseAddrReg).addReg(AArch64::X0);
+ MachineInstr *Copy =
+ BuildMI(*I->getParent(), ++I->getIterator(), I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+ .addReg(AArch64::X0);
return Copy;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index efdb2e3..78c239b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -168,6 +168,8 @@ namespace llvm {
void initializeAArch64CollectLOHPass(PassRegistry &);
}
+#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
+
namespace {
struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
@@ -178,7 +180,7 @@ struct AArch64CollectLOH : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "AArch64 Collect Linker Optimization Hint (LOH)";
+ return AARCH64_COLLECT_LOH_NAME;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -220,12 +222,10 @@ typedef SmallVector<unsigned, 32> MapIdToReg;
char AArch64CollectLOH::ID = 0;
INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
- "AArch64 Collect Linker Optimization Hint (LOH)", false,
- false)
+ AARCH64_COLLECT_LOH_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
- "AArch64 Collect Linker Optimization Hint (LOH)", false,
- false)
+ AARCH64_COLLECT_LOH_NAME, false, false)
/// Given a couple (MBB, reg) get the corresponding set of instruction from
/// the given "sets".
@@ -353,9 +353,17 @@ static void initReachingDef(const MachineFunction &MF,
for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
- assert(ItRegId != RegToId.end() &&
- "Sub-register of an "
- "involved register, not recorded as involved!");
+ // If this alias has not been recorded, then it is not interesting
+ // for the current analysis.
+ // We can end up in this situation because of tuple registers.
+ // E.g., Let say we are interested in S1. When we register
+ // S1, we will also register its aliases and in particular
+ // the tuple Q1_Q2.
+ // Now, when we encounter Q1_Q2, we will look through its aliases
+ // and will find that S2 is not registered.
+ if (ItRegId == RegToId.end())
+ continue;
+
BBKillSet.set(ItRegId->second);
BBGen[ItRegId->second] = &MI;
}
@@ -523,6 +531,8 @@ static bool isCandidateStore(const MachineInstr *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
+ case AArch64::STRBBui:
+ case AArch64::STRHHui:
case AArch64::STRBui:
case AArch64::STRHui:
case AArch64::STRWui:
@@ -884,7 +894,8 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
// If the chain is three instructions long and ldr is the second element,
// then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
+ if (L2 && !IsL2Add &&
+ !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
continue;
SmallVector<const MachineInstr *, 3> Args;
MCLOHType Kind;
@@ -1000,7 +1011,8 @@ static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
DEBUG(dbgs() << "** Collect Involved Register\n");
for (const auto &MBB : MF) {
for (const MachineInstr &MI : MBB) {
- if (!canDefBePartOfLOH(&MI))
+ if (!canDefBePartOfLOH(&MI) &&
+ !isCandidateLoad(&MI) && !isCandidateStore(&MI))
continue;
// Process defs
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index b9e41c6..fc27bfe 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -59,6 +59,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -153,13 +154,20 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
case AArch64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
case AArch64::ADDSWri:
- case AArch64::ADDSXri:
- if (MRI->use_empty(I->getOperand(0).getReg()))
- return I;
-
- DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
- return nullptr;
-
+ case AArch64::ADDSXri: {
+ unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
+ if (!I->getOperand(2).isImm()) {
+ DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
+ return nullptr;
+ } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
+ DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
+ return nullptr;
+ } else if (!MRI->use_empty(I->getOperand(0).getReg())) {
+ DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+ return nullptr;
+ }
+ return I;
+ }
// Prevent false positive case like:
// cmp w19, #0
// cinc w0, w19, gt
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 2b0c92f..df1320f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -353,7 +353,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
MIOperands::PhysRegInfo PRI =
MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);
- if (PRI.Reads) {
+ if (PRI.Read) {
// The ccmp doesn't produce exactly the same flags as the original
// compare, so reject the transform if there are uses of the flags
// besides the terminators.
@@ -362,7 +362,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
return nullptr;
}
- if (PRI.Clobbers) {
+ if (PRI.Defined || PRI.Clobbered) {
DEBUG(dbgs() << "Not convertible compare: " << *I);
++NumUnknNZCVDefs;
return nullptr;
@@ -567,8 +567,8 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// All CmpBB instructions are moved into Head, and CmpBB is deleted.
// Update the CFG first.
updateTailPHIs();
- Head->removeSuccessor(CmpBB);
- CmpBB->removeSuccessor(Tail);
+ Head->removeSuccessor(CmpBB, true);
+ CmpBB->removeSuccessor(Tail, true);
Head->transferSuccessorsAndUpdatePHIs(CmpBB);
DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
TII->RemoveBranch(*Head);
@@ -786,13 +786,13 @@ void AArch64ConditionalCompares::updateDomTree(
// convert() removes CmpBB which was previously dominated by Head.
// CmpBB children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
- for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
- MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ for (MachineBasicBlock *RemovedMBB : Removed) {
+ MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
assert(Node != HeadNode && "Cannot erase the head node");
assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
while (Node->getNumChildren())
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
- DomTree->eraseNode(Removed[i]);
+ DomTree->eraseNode(RemovedMBB);
}
}
@@ -801,8 +801,8 @@ void
AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
- for (unsigned i = 0, e = Removed.size(); i != e; ++i)
- Loops->removeBlock(Removed[i]);
+ for (MachineBasicBlock *RemovedMBB : Removed)
+ Loops->removeBlock(RemovedMBB);
}
/// Invalidate MachineTraceMetrics before if-conversion.
@@ -899,7 +899,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ MinSize = MF.getFunction()->optForMinSize();
bool Changed = false;
CmpConv.runOnMachineFunction(MF);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 74fc167..576cf4a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -26,6 +26,12 @@ using namespace llvm;
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+namespace llvm {
+void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry &);
+}
+
+#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
+
namespace {
class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
private:
@@ -35,11 +41,14 @@ private:
bool usesFrameIndex(const MachineInstr &MI);
public:
static char ID; // Pass identification, replacement for typeid.
- explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+ explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
+ initializeAArch64DeadRegisterDefinitionsPass(
+ *PassRegistry::getPassRegistry());
+ }
bool runOnMachineFunction(MachineFunction &F) override;
- const char *getPassName() const override { return "Dead register definitions"; }
+ const char *getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -49,6 +58,9 @@ public:
char AArch64DeadRegisterDefinitions::ID = 0;
} // end anonymous namespace
+INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
+ AARCH64_DEAD_REG_DEF_NAME, false, false)
+
bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
unsigned Reg, const MachineInstr &MI) {
for (const MachineOperand &MO : MI.implicit_operands())
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c2470f7..d24e42a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -22,18 +22,26 @@
#include "llvm/Support/MathExtras.h"
using namespace llvm;
+namespace llvm {
+void initializeAArch64ExpandPseudoPass(PassRegistry &);
+}
+
+#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
+
namespace {
class AArch64ExpandPseudo : public MachineFunctionPass {
public:
static char ID;
- AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
+ AArch64ExpandPseudo() : MachineFunctionPass(ID) {
+ initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
const AArch64InstrInfo *TII;
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "AArch64 pseudo instruction expansion pass";
+ return AARCH64_EXPAND_PSEUDO_NAME;
}
private:
@@ -45,6 +53,9 @@ private:
char AArch64ExpandPseudo::ID = 0;
}
+INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
+ AARCH64_EXPAND_PSEUDO_NAME, false, false)
+
/// \brief Transfer implicit operands on the pseudo instruction to the
/// instructions created from the expansion.
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 0728198..0ac4b39 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -523,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
U = C;
}
- if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
if (Ty->getAddressSpace() > 255)
// Fast instruction selection doesn't support the special
// address spaces.
@@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
// Cannot encode an offset register and an immediate offset in the same
// instruction. Fold the immediate offset into the load/store instruction and
- // emit an additonal add to take care of the offset register.
+ // emit an additional add to take care of the offset register.
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
RegisterOffsetNeedsLowering = true;
@@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr,
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
// and alignment should be based on the VT.
MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI).addImm(Offset);
} else {
@@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
}
// Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(MulLHS);
- return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
- AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
+ WantResult);
+ if (ResultReg)
+ return ResultReg;
}
+ }
// Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
@@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftType, ShiftVal, SetFlags,
- WantResult);
+ ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftType, ShiftVal, SetFlags,
+ WantResult);
+ if (ResultReg)
+ return ResultReg;
}
}
}
+ }
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
@@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
+ // Don't deal with undefined shifts.
+ if (ShiftImm >= RetVT.getSizeInBits())
+ return 0;
+
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrs, AArch64::SUBXrs },
{ AArch64::ADDWrs, AArch64::ADDXrs } },
@@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
+ if (ShiftImm >= 4)
+ return 0;
+
static const unsigned OpcTable[2][2][2] = {
{ { AArch64::SUBWrx, AArch64::SUBXrx },
{ AArch64::ADDWrx, AArch64::ADDXrx } },
@@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
return ResultReg;
// Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (isMulPowOf2(RHS)) {
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(MulLHS);
- return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ if (ResultReg)
+ return ResultReg;
}
+ }
// Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS))
+ if (RHS->hasOneUse() && isValueAvailable(RHS)) {
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
uint64_t ShiftVal = C->getZExtValue();
@@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
if (!RHSReg)
return 0;
bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
- return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
- RHSIsKill, ShiftVal);
+ ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill, ShiftVal);
+ if (ResultReg)
+ return ResultReg;
}
+ }
unsigned RHSReg = getRegForValue(RHS);
if (!RHSReg)
@@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
{ AArch64::ORRWrs, AArch64::ORRXrs },
{ AArch64::EORWrs, AArch64::EORXrs }
};
+
+ // Don't deal with undefined shifts.
+ if (ShiftImm >= RetVT.getSizeInBits())
+ return 0;
+
const TargetRegisterClass *RC;
unsigned Opc;
switch (RetVT.SimpleTy) {
@@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
MIB.addImm(TestBit);
MIB.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
- fastEmitBranch(FBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -2257,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && isValueAvailable(CI)) {
// Try to optimize or fold the cmp.
@@ -2289,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
// instruction.
- CC = getCompareCC(Predicate);
+ AArch64CC::CondCode CC = getCompareCC(Predicate);
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
switch (Predicate) {
default:
@@ -2317,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
.addImm(CC)
.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
- return true;
- }
- } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
- MVT SrcVT;
- if (TI->hasOneUse() && isValueAvailable(TI) &&
- isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
- unsigned CondReg = getRegForValue(TI->getOperand(0));
- if (!CondReg)
- return false;
- bool CondIsKill = hasTrivialKill(TI->getOperand(0));
-
- // Issue an extract_subreg to get the lower 32-bits.
- if (SrcVT == MVT::i64) {
- CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
- AArch64::sub_32);
- CondIsKill = true;
- }
-
- unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
-
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- CC = AArch64CC::EQ;
- }
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
-
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
@@ -2371,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
- // Obtain the branch weight and add the target to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- Target->getBasicBlock());
- FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+ // Obtain the branch probability and add the target to the successor list.
+ if (FuncInfo.BPI) {
+ auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+ BI->getParent(), Target->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(Target, BranchProbability);
+ } else
+ FuncInfo.MBB->addSuccessorWithoutProb(Target);
return true;
- } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- unsigned CondReg = getRegForValue(BI->getCondition());
- if (!CondReg)
- return false;
-
- // Emit the branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
+ } else {
+ AArch64CC::CondCode CC = AArch64CC::NE;
+ if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- return true;
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
+ }
}
unsigned CondReg = getRegForValue(BI->getCondition());
@@ -2406,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
return false;
bool CondRegIsKill = hasTrivialKill(BI->getCondition());
- // We've been divorced from our compare! Our block was split, and
- // now our compare lives in a predecessor block. We musn't
- // re-compare here, as the children of the compare aren't guaranteed
- // live across the block boundary (we *could* check for this).
- // Regardless, the compare has been done in the predecessor block,
- // and it left a value for us in a virtual register. Ergo, we test
- // the one-bit value left in the virtual register.
- emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
-
+ // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
+ unsigned Opcode = AArch64::TBNZW;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
- CC = AArch64CC::EQ;
+ Opcode = AArch64::TBZW;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
+ const MCInstrDesc &II = TII.get(Opcode);
+ unsigned ConstrainedCondReg
+ = constrainOperandRegClass(II, CondReg, II.getNumDefs());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
+ .addImm(0)
.addMBB(TBB);
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
- fastEmitBranch(FBB, DbgLoc);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -2447,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
// Make sure the CFG is up-to-date.
- for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
+ for (auto *Succ : BI->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
return true;
}
@@ -2456,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
bool AArch64FastISel::selectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
+ // Vectors of i1 are weird: bail out.
+ if (CI->getType()->isVectorTy())
+ return false;
+
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
unsigned ResultReg = 0;
@@ -2954,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
.addImm(NumBytes);
// Process the args.
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (CCValAssign &VA : ArgLocs) {
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
MVT ArgVT = OutVTs[VA.getValNo()];
@@ -3018,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(Addr.getOffset()),
- MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (!emitStore(ArgVT, ArgReg, Addr, MMO))
return false;
@@ -3318,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
return false;
// Make sure nothing is in the way
- BasicBlock::const_iterator Start = I;
- BasicBlock::const_iterator End = II;
+ BasicBlock::const_iterator Start(I);
+ BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
@@ -3684,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (F.isVarArg())
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
@@ -3763,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(AArch64::RET_ReallyLR));
- for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
- MIB.addReg(RetRegs[i], RegState::Implicit);
+ for (unsigned RetReg : RetRegs)
+ MIB.addReg(RetReg, RegState::Implicit);
return true;
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a76473f..11ae800 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -72,9 +72,9 @@
//
// For most functions, some of the frame areas are empty. For those functions,
// it may not be necessary to set up fp or bp:
-// * A base pointer is definitly needed when there are both VLAs and local
+// * A base pointer is definitely needed when there are both VLAs and local
// variables with more-than-default alignment requirements.
-// * A frame pointer is definitly needed when there are local variables with
+// * A frame pointer is definitely needed when there are local variables with
// more-than-default alignment requirements.
//
// In some cases when a base pointer is not strictly needed, it is generated
@@ -216,11 +216,11 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
if (CSI.empty())
return;
- const DataLayout *TD = MF.getTarget().getDataLayout();
+ const DataLayout &TD = MF.getDataLayout();
bool HasFP = hasFP(MF);
// Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize(0);
+ int stackGrowth = -TD.getPointerSize(0);
// Calculate offsets.
int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
@@ -280,14 +280,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
bool HasFP = hasFP(MF);
- DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
@@ -354,7 +357,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes && NeedsRealignment) {
// Use the first callee-saved register as a scratch register.
scratchSPReg = AArch64::X9;
- MF.getRegInfo().setPhysRegUsed(scratchSPReg);
}
// If we're a leaf function, try using the red zone.
@@ -400,8 +402,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}
if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
- const int StackGrowth = -TD->getPointerSize(0);
+ const DataLayout &TD = MF.getDataLayout();
+ const int StackGrowth = -TD.getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
@@ -513,33 +515,33 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
return false;
}
-static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+/// Checks whether the given instruction restores callee save registers
+/// and if so returns how many.
+static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) {
unsigned RtIdx = 0;
- if (MI->getOpcode() == AArch64::LDPXpost ||
- MI->getOpcode() == AArch64::LDPDpost)
+ switch (MI.getOpcode()) {
+ case AArch64::LDPXpost:
+ case AArch64::LDPDpost:
RtIdx = 1;
-
- if (MI->getOpcode() == AArch64::LDPXpost ||
- MI->getOpcode() == AArch64::LDPDpost ||
- MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
- if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
- MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
- return false;
- return true;
+ // FALLTHROUGH
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) ||
+ !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) ||
+ MI.getOperand(RtIdx + 2).getReg() != AArch64::SP)
+ return 0;
+ return 2;
}
-
- return false;
+ return 0;
}
void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64InstrInfo *TII =
- static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
- const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
bool IsTailCallReturn = false;
if (MBB.end() != MBBI) {
@@ -585,7 +587,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// ---------------------| --- |
// | | | |
// | CalleeSavedReg | | |
- // | (NumRestores * 16) | | |
+ // | (NumRestores * 8) | | |
// | | | |
// ---------------------| | NumBytes
// | | StackSize (StackAdjustUp)
@@ -606,17 +608,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
- if (LastPopI != MBB.begin()) {
- do {
- ++NumRestores;
- --LastPopI;
- } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
- if (!isCSRestore(LastPopI, CSRegs)) {
+ MachineBasicBlock::iterator Begin = MBB.begin();
+ while (LastPopI != Begin) {
+ --LastPopI;
+ unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
+ NumRestores += Restores;
+ if (Restores == 0) {
++LastPopI;
- --NumRestores;
+ break;
}
}
- NumBytes -= NumRestores * 16;
+ NumBytes -= NumRestores * 8;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
if (!hasFP(MF)) {
@@ -634,15 +636,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// be able to save any instructions.
if (NumBytes || MFI->hasVarSizedObjects())
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
- -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
+ -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -739,9 +733,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
for (unsigned i = 0; i < Count; i += 2) {
unsigned idx = Count - i - 2;
unsigned Reg1 = CSI[idx].getReg();
@@ -911,7 +902,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
+ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Check pairs of consecutive callee-saved registers.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 731f031..427afdf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -37,7 +37,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
@@ -61,6 +60,11 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
+
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 772e894..6c86888 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -34,7 +34,6 @@ using namespace llvm;
namespace {
class AArch64DAGToDAGISel : public SelectionDAGISel {
- AArch64TargetMachine &TM;
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -45,7 +44,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
public:
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+ : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
ForCodeSize(false) {}
const char *getPassName() const override {
@@ -53,9 +52,7 @@ public:
}
bool runOnMachineFunction(MachineFunction &MF) override {
- ForCodeSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ ForCodeSize = MF.getFunction()->optForSize();
Subtarget = &MF.getSubtarget<AArch64Subtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -79,6 +76,21 @@ public:
bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
return SelectShiftedRegister(N, true, Reg, Shift);
}
+ bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+ }
bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
return SelectAddrModeIndexed(N, 1, Base, OffImm);
}
@@ -153,8 +165,7 @@ public:
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
-
- SDNode *SelectLIBM(SDNode *N);
+ SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
SDNode *SelectReadRegister(SDNode *N);
SDNode *SelectWriteRegister(SDNode *N);
@@ -165,6 +176,8 @@ public:
private:
bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
SDValue &Shift);
+ bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
SDValue &OffImm);
bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -422,7 +435,7 @@ static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
return true;
}
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
// high lane extract.
static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
SDValue &LaneOp, int &LaneIdx) {
@@ -572,7 +585,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
}
// AArch64 mandates that the RHS of the operation must use the smallest
- // register classs that could contain the size being extended from. Thus,
+ // register class that could contain the size being extended from. Thus,
// if we're folding a (sext i8), we need the RHS to be a GPR32, even though
// there might not be an actual 32-bit value in the program. We can
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
@@ -587,7 +600,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
/// need to create a real ADD instruction from it anyway and there's no point in
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
-/// leads to duplaicated ADRP instructions.
+/// leads to duplicated ADRP instructions.
static bool isWorthFoldingADDlow(SDValue N) {
for (auto Use : N->uses()) {
if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
@@ -604,6 +617,51 @@ static bool isWorthFoldingADDlow(SDValue N) {
return true;
}
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ SDLoc dl(N);
+ const DataLayout &DL = CurDAG->getDataLayout();
+ const TargetLowering *TLI = getTargetLowering();
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+ }
+
+ // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+ // selected here doesn't support labels/immediates, only base+offset.
+
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
+ RHSC < (0x40 << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+ return true;
+ }
+ }
+ }
+
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // stp x1, x2, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+ return true;
+}
+
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
/// immediate" address. The "Size" argument is the size in bytes of the memory
/// reference, which determines the scale.
@@ -867,7 +925,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
if (isa<ConstantSDNode>(RHS)) {
int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
unsigned Scale = Log2_32(Size);
- // Skip the immediate can be seleced by load/store addressing mode.
+ // Skip the immediate can be selected by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
// checked by using -ImmOff).
if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
@@ -1034,6 +1092,8 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
// it into an i64.
DstVT = MVT::i32;
}
+ } else if (VT == MVT::f16) {
+ Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
} else if (VT == MVT::f32) {
Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
} else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1222,8 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
SDValue SuperReg = SDValue(Ld, 0);
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
if (Narrow)
@@ -1275,8 +1335,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
} else {
EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
- AArch64::qsub3 };
+ static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
for (unsigned i = 0; i < NumVecs; ++i) {
SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
SuperReg);
@@ -1420,7 +1480,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
// The resulting code will be at least as good as the original one
// plus it may expose more opportunities for bitfield insert pattern.
// FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM
+ // some optimizations expect AND and not UBFM.
Opd0 = N->getOperand(0);
} else
return false;
@@ -1852,6 +1912,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
/// Does this tree qualify as an attempt to move a bitfield into position,
/// essentially "(and (shl VAL, N), Mask)".
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+ bool BiggerPattern,
SDValue &Src, int &ShiftAmount,
int &MaskWidth) {
EVT VT = Op.getValueType();
@@ -1874,6 +1935,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
Op = Op.getOperand(0);
}
+ // Don't match if the SHL has more than one use, since then we'll end up
+ // generating SHL+UBFIZ instead of just keeping SHL+AND.
+ if (!BiggerPattern && !Op.hasOneUse())
+ return false;
+
uint64_t ShlImm;
if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
return false;
@@ -1887,7 +1953,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
// BFI encompasses sufficiently many nodes that it's worth inserting an extra
// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
- // amount.
+ // amount. BiggerPattern is true when this pattern is being matched for BFI,
+ // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+ // which case it is not profitable to insert an extra shift.
+ if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+ return false;
Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
return true;
@@ -1904,7 +1974,8 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
SDValue &Src, unsigned &ImmR,
- unsigned &ImmS, SelectionDAG *CurDAG) {
+ unsigned &ImmS, const APInt &UsefulBits,
+ SelectionDAG *CurDAG) {
assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
// Set Opc
@@ -1918,23 +1989,30 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
// Because of simplify-demanded-bits in DAGCombine, involved masks may not
// have the expected shape. Try to undo that.
- APInt UsefulBits;
- getUsefulBits(SDValue(N, 0), UsefulBits);
unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
- // OR is commutative, check both possibilities (does llvm provide a
- // way to do that directely, e.g., via code matcher?)
- SDValue OrOpd1Val = N->getOperand(1);
- SDNode *OrOpd0 = N->getOperand(0).getNode();
- SDNode *OrOpd1 = N->getOperand(1).getNode();
- for (int i = 0; i < 2;
- ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+ // OR is commutative, check all combinations of operand order and values of
+ // BiggerPattern, i.e.
+ // Opd0, Opd1, BiggerPattern=false
+ // Opd1, Opd0, BiggerPattern=false
+ // Opd0, Opd1, BiggerPattern=true
+ // Opd1, Opd0, BiggerPattern=true
+ // Several of these combinations may match, so check with BiggerPattern=false
+ // first since that will produce better results by matching more instructions
+ // and/or inserting fewer extra instructions.
+ for (int I = 0; I < 4; ++I) {
+
+ bool BiggerPattern = I / 2;
+ SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+ SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+ SDNode *OrOpd1 = OrOpd1Val.getNode();
+
unsigned BFXOpc;
int DstLSB, Width;
if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
- NumberOfIgnoredLowBits, true)) {
+ NumberOfIgnoredLowBits, BiggerPattern)) {
// Check that the returned opcode is compatible with the pattern,
// i.e., same type and zero extended (U and not S)
if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@@ -1952,8 +2030,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
// If the mask on the insertee is correct, we have a BFXIL operation. We
// can share the ImmR and ImmS values from the already-computed UBFM.
- } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
- DstLSB, Width)) {
+ } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+ BiggerPattern,
+ Src, DstLSB, Width)) {
ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
ImmS = Width - 1;
} else
@@ -2003,11 +2082,18 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
unsigned Opc;
unsigned LSB, MSB;
SDValue Opd0, Opd1;
+ EVT VT = N->getValueType(0);
+ APInt NUsefulBits;
+ getUsefulBits(SDValue(N, 0), NUsefulBits);
+
+ // If all bits are not useful, just return UNDEF.
+ if (!NUsefulBits)
+ return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
- if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+ if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
+ CurDAG))
return nullptr;
- EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue Ops[] = { Opd0,
Opd1,
@@ -2016,58 +2102,37 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
-SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+ if (N->getOpcode() != ISD::AND)
+ return nullptr;
+
EVT VT = N->getValueType(0);
- unsigned Variant;
unsigned Opc;
- unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
-
- if (VT == MVT::f32) {
- Variant = 0;
- } else if (VT == MVT::f64) {
- Variant = 1;
- } else
- return nullptr; // Unrecognized argument type. Fall back on default codegen.
-
- // Pick the FRINTX variant needed to set the flags.
- unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
- switch (N->getOpcode()) {
- default:
- return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
- case ISD::FCEIL: {
- unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
- Opc = FRINTPOpcs[Variant];
- break;
- }
- case ISD::FFLOOR: {
- unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
- Opc = FRINTMOpcs[Variant];
- break;
- }
- case ISD::FTRUNC: {
- unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
- Opc = FRINTZOpcs[Variant];
- break;
- }
- case ISD::FROUND: {
- unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
- Opc = FRINTAOpcs[Variant];
- break;
- }
- }
+ if (VT == MVT::i32)
+ Opc = AArch64::UBFMWri;
+ else if (VT == MVT::i64)
+ Opc = AArch64::UBFMXri;
+ else
+ return nullptr;
- SDLoc dl(N);
- SDValue In = N->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- Ops.push_back(In);
+ SDValue Op0;
+ int DstLSB, Width;
+ if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+ Op0, DstLSB, Width))
+ return nullptr;
- if (!TM.Options.UnsafeFPMath) {
- SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
- Ops.push_back(SDValue(FRINTX, 1));
- }
+ // ImmR is the rotate right amount.
+ unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+ // ImmS is the most significant bit of the source to be moved.
+ unsigned ImmS = Width - 1;
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+ SDLoc DL(N);
+ SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+ CurDAG->getTargetConstant(ImmS, DL, VT)};
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
}
bool
@@ -2119,7 +2184,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
// into a single value to be used in the MRS/MSR instruction.
static int getIntOperandFromRegisterString(StringRef RegString) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() == 1)
return -1;
@@ -2206,7 +2271,15 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
assert (isa<ConstantSDNode>(N->getOperand(2))
&& "Expected a constant integer expression.");
uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
+ unsigned State;
+ if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+ assert(Immed < 2 && "Bad imm");
+ State = AArch64::MSRpstateImm1;
+ } else {
+ assert(Immed < 16 && "Bad imm");
+ State = AArch64::MSRpstateImm4;
+ }
+ return CurDAG->getMachineNode(State, DL, MVT::Other,
CurDAG->getTargetConstant(Reg, DL, MVT::i32),
CurDAG->getTargetConstant(Immed, DL, MVT::i16),
N->getOperand(0));
@@ -2279,6 +2352,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
case ISD::SRA:
if (SDNode *I = SelectBitfieldExtractOp(Node))
return I;
+ if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+ return I;
break;
case ISD::OR:
@@ -2802,6 +2877,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
}
}
+ break;
}
case AArch64ISD::LD2post: {
if (VT == MVT::v8i8)
@@ -3214,14 +3290,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
break;
}
-
- case ISD::FCEIL:
- case ISD::FFLOOR:
- case ISD::FTRUNC:
- case ISD::FROUND:
- if (SDNode *I = SelectLIBM(Node))
- return I;
- break;
}
// Select the default instruction
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3e8f46c..9f5beff 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -40,23 +40,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
-namespace {
-enum AlignMode {
- StrictAlign,
- NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
- cl::Hidden, cl::init(NoStrictAlign),
- cl::values(
- clEnumValN(StrictAlign, "aarch64-strict-align",
- "Disallow all unaligned memory accesses"),
- clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
- "Allow unaligned memory accesses"),
- clEnumValEnd));
-
// Place holder until extr generation is tested fully.
static cl::opt<bool>
EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
@@ -76,6 +59,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
+/// Value type used for condition codes.
+static const MVT MVT_CC = MVT::i32;
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@@ -210,11 +196,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- // Exception handling.
- // FIXME: These are guesses. Has this been defined yet?
- setExceptionPointerRegister(AArch64::X0);
- setExceptionSelectorRegister(AArch64::X1);
-
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
@@ -234,6 +215,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -252,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ }
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
@@ -315,6 +304,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
@@ -403,10 +394,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
+ setOperationAction(ISD::FMINNUM, Ty, Legal);
+ setOperationAction(ISD::FMAXNUM, Ty, Legal);
+ setOperationAction(ISD::FMINNAN, Ty, Legal);
+ setOperationAction(ISD::FMAXNAN, Ty, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
+ // This requires the Performance Monitors extension.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+
if (Subtarget->isTargetMachO()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret to avoid memory
@@ -456,12 +456,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
+ setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
+ setIndexedStoreAction(im, MVT::f16, Legal);
}
// Trap.
@@ -479,6 +481,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
+ setTargetDAGCombine(ISD::FP_TO_SINT);
+ setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FDIV);
+
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
@@ -487,16 +493,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
+ if (Subtarget->supportsAddressTopByteIgnored())
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
@@ -512,10 +520,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setMinFunctionAlignment(2);
- RequireStrictAlign = (Align == StrictAlign);
-
setHasExtractBitsInsn(true);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
@@ -646,6 +654,9 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+
+ // But we do support custom-lowering for FCOPYSIGN.
+ setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
@@ -686,6 +697,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+ // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
+ if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
+ for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
+ ISD::FMINNUM, ISD::FMAXNUM})
+ setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -730,7 +747,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
@@ -780,6 +797,34 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
return MVT::i64;
}
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ bool *Fast) const {
+ if (Subtarget->requiresStrictAlign())
+ return false;
+
+ // FIXME: This is mostly true for Cyclone, but not necessarily others.
+ if (Fast) {
+ // FIXME: Define an attribute for slow unaligned accesses instead of
+ // relying on the CPU type as a proxy.
+ // On Cyclone, unaligned 128-bit stores are slow.
+ *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+ // See comments in performSTORECombine() for more details about
+ // these conditions.
+
+ // Code that uses clang vector extensions can mark that it
+ // wants unaligned accesses to be treated as fast by
+ // underspecifying alignment to be 1 or 2.
+ Align <= 2 ||
+
+ // Disregard v2i64. Memcpy lowering produces those and splitting
+ // them regresses performance on micro-benchmarks and olden/bh.
+ VT == MVT::v2i64;
+ }
+ return true;
+}
+
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
@@ -809,9 +854,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
+ case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
+ case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
+ case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
- case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
- case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
case AArch64ISD::DUP: return "AArch64ISD::DUP";
case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
@@ -931,8 +977,7 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI->getDebugLoc();
- MachineFunction::iterator It = MBB;
- ++It;
+ MachineFunction::iterator It = ++MBB->getIterator();
unsigned DestReg = MI->getOperand(0).getReg();
unsigned IfTrueReg = MI->getOperand(1).getReg();
@@ -1141,8 +1186,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
- if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
- cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
@@ -1156,8 +1200,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
// the absence of information about op2.
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
- } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
@@ -1167,14 +1210,230 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
LHS = LHS.getOperand(0);
}
- return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
+/// \defgroup AArch64CCMP CMP;CCMP matching
+///
+/// These functions deal with the formation of CMP;CCMP;... sequences.
+/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
+/// a comparison. They set the NZCV flags to a predefined value if their
+/// predicate is false. This allows to express arbitrary conjunctions, for
+/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
+/// expressed as:
+/// cmp A
+/// ccmp B, inv(CB), CA
+/// check for CB flags
+///
+/// In general we can create code for arbitrary "... (and (and A B) C)"
+/// sequences. We can also implement some "or" expressions, because "(or A B)"
+/// is equivalent to "not (and (not A) (not B))" and we can implement some
+/// negation operations:
+/// We can negate the results of a single comparison by inverting the flags
+/// used when the predicate fails and inverting the flags tested in the next
+/// instruction; We can also negate the results of the whole previous
+/// conditional compare sequence by inverting the flags tested in the next
+/// instruction. However there is no way to negate the result of a partial
+/// sequence.
+///
+/// Therefore on encountering an "or" expression we can negate the subtree on
+/// one side and have to be able to push the negate to the leafs of the subtree
+/// on the other side (see also the comments in code). As complete example:
+/// "or (or (setCA (cmp A)) (setCB (cmp B)))
+/// (and (setCC (cmp C)) (setCD (cmp D)))"
+/// is transformed to
+/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
+/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
+/// and implemented as:
+/// cmp C
+/// ccmp D, inv(CD), CC
+/// ccmp A, CA, inv(CD)
+/// ccmp B, CB, inv(CA)
+/// check for CB flags
+/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
+/// by conditional compare sequences.
+/// @{
+
+/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
+static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue CCOp,
+ SDValue Condition, unsigned NZCV,
+ SDLoc DL, SelectionDAG &DAG) {
+ unsigned Opcode = 0;
+ if (LHS.getValueType().isFloatingPoint())
+ Opcode = AArch64ISD::FCCMP;
+ else if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubOp0 = RHS.getOperand(0);
+ if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // See emitComparison() on why we can only do this for SETEQ and SETNE.
+ Opcode = AArch64ISD::CCMN;
+ RHS = RHS.getOperand(1);
+ }
+ }
+ if (Opcode == 0)
+ Opcode = AArch64ISD::CCMP;
+
+ SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+ return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+}
+
+/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
+/// CanPushNegate is set to true if we can push a negate operation through
+/// the tree in a was that we are left with AND operations and negate operations
+/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
+/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
+/// brought into such a form.
+static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
+ unsigned Depth = 0) {
+ if (!Val.hasOneUse())
+ return false;
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ CanPushNegate = true;
+ return true;
+ }
+ // Protect against stack overflow.
+ if (Depth > 15)
+ return false;
+ if (Opcode == ISD::AND || Opcode == ISD::OR) {
+ SDValue O0 = Val->getOperand(0);
+ SDValue O1 = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
+ return false;
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
+ return false;
+ // We cannot push a negate through an AND operation (it would become an OR),
+ // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
+ // push the negate through the x/y subtrees.
+ CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
+ return true;
+ }
+ return false;
+}
+
+/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
+/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
+/// Tries to transform the given i1 producing node @p Val to a series compare
+/// and conditional compare operations. @returns an NZCV flags producing node
+/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
+/// transformation was not possible.
+/// On recursive invocations @p PushNegate may be set to true to have negation
+/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
+/// for the comparisons in the current subtree; @p Depth limits the search
+/// depth to avoid stack overflow.
+static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
+ AArch64CC::CondCode &OutCC, bool PushNegate = false,
+ SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
+ unsigned Depth = 0) {
+ // We're at a tree leaf, produce a conditional comparison operation.
+ unsigned Opcode = Val->getOpcode();
+ if (Opcode == ISD::SETCC) {
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
+ bool isInteger = LHS.getValueType().isInteger();
+ if (PushNegate)
+ CC = getSetCCInverse(CC, isInteger);
+ SDLoc DL(Val);
+ // Determine OutCC and handle FP special case.
+ if (isInteger) {
+ OutCC = changeIntCCToAArch64CC(CC);
+ } else {
+ assert(LHS.getValueType().isFloatingPoint());
+ AArch64CC::CondCode ExtraCC;
+ changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
+ // Surpisingly some floating point conditions can't be tested with a
+ // single condition code. Construct an additional comparison in this case.
+ // See comment below on how we deal with OR conditions.
+ if (ExtraCC != AArch64CC::AL) {
+ SDValue ExtraCmp;
+ if (!CCOp.getNode())
+ ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+ else {
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
+ ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
+ NZCV, DL, DAG);
+ }
+ CCOp = ExtraCmp;
+ Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ }
+ }
+
+ // Produce a normal comparison if we are first in the chain
+ if (!CCOp.getNode())
+ return emitComparison(LHS, RHS, CC, DL, DAG);
+ // Otherwise produce a ccmp.
+ SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+ AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+ return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
+ DAG);
+ } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse())
+ return SDValue();
+
+ assert((Opcode == ISD::OR || !PushNegate)
+ && "Can only push negate through OR operation");
+
+ // Check if both sides can be transformed.
+ SDValue LHS = Val->getOperand(0);
+ SDValue RHS = Val->getOperand(1);
+ bool CanPushNegateL;
+ if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
+ return SDValue();
+ bool CanPushNegateR;
+ if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
+ return SDValue();
+
+ // Do we need to negate our operands?
+ bool NegateOperands = Opcode == ISD::OR;
+ // We can negate the results of all previous operations by inverting the
+ // predicate flags giving us a free negation for one side. For the other side
+ // we need to be able to push the negation to the leafs of the tree.
+ if (NegateOperands) {
+ if (!CanPushNegateL && !CanPushNegateR)
+ return SDValue();
+ // Order the side where we can push the negate through to LHS.
+ if (!CanPushNegateL && CanPushNegateR)
+ std::swap(LHS, RHS);
+ } else {
+ bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
+ bool NeedsNegOutR = RHS->getOpcode() == ISD::OR;
+ if (NeedsNegOutL && NeedsNegOutR)
+ return SDValue();
+ // Order the side where we need to negate the output flags to RHS so it
+ // gets emitted first.
+ if (NeedsNegOutL)
+ std::swap(LHS, RHS);
+ }
+
+ // Emit RHS. If we want to negate the tree we only need to push a negate
+ // through if we are already in a PushNegate case, otherwise we can negate
+ // the "flags to test" afterwards.
+ AArch64CC::CondCode RHSCC;
+ SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
+ CCOp, Predicate, Depth+1);
+ if (NegateOperands && !PushNegate)
+ RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+ // Emit LHS. We must push the negate through if we need to negate it.
+ SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
+ CmpR, RHSCC, Depth+1);
+ // If we transformed an OR to and AND then we have to negate the result
+ // (or absorb a PushNegate resulting in a double negation).
+ if (Opcode == ISD::OR && !PushNegate)
+ OutCC = AArch64CC::getInvertedCondCode(OutCC);
+ return CmpL;
+}
+
+/// @}
+
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
- SDValue Cmp;
- AArch64CC::CondCode AArch64CC;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
@@ -1229,47 +1488,56 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
}
- // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
- // For the i8 operand, the largest immediate is 255, so this can be easily
- // encoded in the compare instruction. For the i16 operand, however, the
- // largest immediate cannot be encoded in the compare.
- // Therefore, use a sign extending load and cmn to avoid materializing the -1
- // constant. For example,
- // movz w1, #65535
- // ldrh w0, [x0, #0]
- // cmp w0, w1
- // >
- // ldrsh w0, [x0, #0]
- // cmn w0, #1
- // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
- // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
- // both the LHS and RHS are truely zero extended and to make sure the
- // transformation is profitable.
+ SDValue Cmp;
+ AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
- if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
- isa<LoadSDNode>(LHS)) {
- if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
- cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
- LHS.getNode()->hasNUsesOfValue(1, 0)) {
- int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
- if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
- SDValue SExt =
- DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
- DAG.getValueType(MVT::i16));
- Cmp = emitComparison(SExt,
- DAG.getConstant(ValueofRHS, dl,
- RHS.getValueType()),
- CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
- return Cmp;
- }
+ const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
+
+ // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+ // For the i8 operand, the largest immediate is 255, so this can be easily
+ // encoded in the compare instruction. For the i16 operand, however, the
+ // largest immediate cannot be encoded in the compare.
+ // Therefore, use a sign extending load and cmn to avoid materializing the
+ // -1 constant. For example,
+ // movz w1, #65535
+ // ldrh w0, [x0, #0]
+ // cmp w0, w1
+ // >
+ // ldrsh w0, [x0, #0]
+ // cmn w0, #1
+ // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+ // if and only if (sext LHS) == (sext RHS). The checks are in place to
+ // ensure both the LHS and RHS are truly zero extended and to make sure the
+ // transformation is profitable.
+ if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
+ cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+ cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+ LHS.getNode()->hasNUsesOfValue(1, 0)) {
+ int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+ DAG.getValueType(MVT::i16));
+ Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
+ RHS.getValueType()),
+ CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
}
+
+ if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
+ if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
+ }
+ }
+ }
+
+ if (!Cmp) {
+ Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC = changeIntCCToAArch64CC(CC);
}
- Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- AArch64CC = changeIntCCToAArch64CC(CC);
- AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+ AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
@@ -1391,8 +1659,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
}
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
@@ -1571,8 +1838,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
@@ -1581,6 +1848,16 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
+ unsigned NumElts = InVT.getVectorNumElements();
+
+ // f16 vectors are promoted to f32 before a conversion.
+ if (InVT.getVectorElementType() == MVT::f16) {
+ MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
+ SDLoc dl(Op);
+ return DAG.getNode(
+ Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
+ }
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
@@ -1628,8 +1905,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
- return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -1931,6 +2207,31 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
+SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDLoc dl(Op);
+ switch (IntNo) {
+ default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::aarch64_thread_pointer: {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
+ }
+ case Intrinsic::aarch64_neon_smax:
+ return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_umax:
+ return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_smin:
+ return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_neon_umin:
+ return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+}
+
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2032,14 +2333,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFSINCOS(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return LowerINTRINSIC_WO_CHAIN(Op, DAG);
}
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
- return 2;
-}
-
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -2214,9 +2512,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- MemVT, false, false, false, 0);
+ ArgValue = DAG.getExtLoad(
+ ExtType, DL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MemVT, false, false, false, 0);
InVals.push_back(ArgValue);
}
@@ -2289,9 +2588,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 8), false, false, 0);
+ SDValue Store = DAG.getStore(
+ Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8), false,
+ false, 0);
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -2318,9 +2618,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16), false, false, 0);
+ SDValue Store = DAG.getStore(
+ Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16),
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
@@ -2453,8 +2754,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
- if (!ArgLocs[i].isRegLoc())
+ for (const CCValAssign &ArgLoc : ArgLocs)
+ if (!ArgLoc.isRegLoc())
return false;
}
@@ -2758,7 +3059,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
- DstInfo = MachinePointerInfo::getFixedStack(FI);
+ DstInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
@@ -2768,7 +3070,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
- DstInfo = MachinePointerInfo::getStack(LocMemOffset);
+ DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
+ LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -2802,9 +3105,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
+ for (auto &RegToPass : RegsToPass) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
+ RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -2860,9 +3163,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Add argument registers to the end of the list so that they are known live
// into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+ for (auto &RegToPass : RegsToPass)
+ Ops.push_back(DAG.getRegister(RegToPass.first,
+ RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
@@ -2968,6 +3271,19 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (AArch64::GPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
RetOps[0] = Chain; // Update chain.
@@ -3010,11 +3326,12 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
- SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /*isVolatile=*/ false,
- /*isNonTemporal=*/ true,
- /*isInvariant=*/ true, 8);
+ SDValue GlobalAddr = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), PoolAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ /*isVolatile=*/false,
+ /*isNonTemporal=*/true,
+ /*isInvariant=*/true, 8);
if (GN->getOffset() != 0)
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
DAG.getConstant(GN->getOffset(), DL, PtrVT));
@@ -3087,8 +3404,9 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet =
- DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
- false, true, true, 8);
+ DAG.getLoad(MVT::i64, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), false,
+ true, true, 8);
Chain = FuncTLVGet.getValue(1);
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -3160,6 +3478,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
@@ -3277,8 +3599,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
unsigned Opc = LHS.getOpcode();
- if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isOne() &&
+ if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
(Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
@@ -3392,17 +3713,11 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
- if (SrcVT != VT) {
- if (SrcVT == MVT::f32 && VT == MVT::f64)
- In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
- else if (SrcVT == MVT::f64 && VT == MVT::f32)
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
- DAG.getIntPtrConstant(0, DL));
- else
- // FIXME: Src type is different, bail out for now. Can VT really be a
- // vector type?
- return SDValue();
- }
+
+ if (SrcVT.bitsLT(VT))
+ In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
+ else if (SrcVT.bitsGT(VT))
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
EVT EltVT;
@@ -3410,7 +3725,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue VecVal1, VecVal2;
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
EltVT = MVT::i32;
- VecVT = MVT::v4i32;
+ VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
EltMask = 0x80000000ULL;
if (!VT.isVector()) {
@@ -3571,32 +3886,6 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
}
-/// A SELECT_CC operation is really some kind of max or min if both values being
-/// compared are, in some sense, equal to the results in either case. However,
-/// it is permissible to compare f32 values and produce directly extended f64
-/// values.
-///
-/// Extending the comparison operands would also be allowed, but is less likely
-/// to happen in practice since their use is right here. Note that truncate
-/// operations would *not* be semantically equivalent.
-static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
- if (Cmp == Result)
- return (Cmp.getValueType() == MVT::f32 ||
- Cmp.getValueType() == MVT::f64);
-
- ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
- ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
- if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
- Result.getValueType() == MVT::f64) {
- bool Lossy;
- APFloat CmpVal = CCmp->getValueAPF();
- CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
- return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
- }
-
- return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
-}
-
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue RHS, SDValue TVal,
SDValue FVal, SDLoc dl,
@@ -3614,7 +3903,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
}
}
- // Handle integers first.
+ // Also handle f16, for which we need to do a f32 comparison.
+ if (LHS.getValueType() == MVT::f16) {
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
+ RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
+ }
+
+ // Next, handle integers.
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
@@ -3637,9 +3932,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
-
- if (CVal && CVal->isAllOnesValue()) {
+ if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
@@ -3647,9 +3940,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
// that we can match with a CSNEG rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
-
- if (CVal && CVal->isNullValue()) {
+ if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
@@ -4109,46 +4400,57 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+
+ // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
+ // is "undef". We wanted 0, so CSEL it directly.
+ SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+ ISD::SETEQ, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+ HiBitsForLo =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+ HiBitsForLo, CCVal, Cmp);
+
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
- ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue LoForNormalShift =
+ DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
- SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
- SDValue Lo =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+ Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+ dl, DAG);
+ CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+ SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+ LoForNormalShift, CCVal, Cmp);
// AArch64 shifts larger than the register width are wrapped rather than
// clamped, so we can't just emit "hi >> x".
- SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue TrueValHi = Opc == ISD::SRA
- ? DAG.getNode(Opc, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, dl,
- MVT::i64))
- : DAG.getConstant(0, dl, VT);
- SDValue Hi =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
+ SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue HiForBigShift =
+ Opc == ISD::SRA
+ ? DAG.getNode(Opc, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, dl, MVT::i64))
+ : DAG.getConstant(0, dl, VT);
+ SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+ HiForNormalShift, CCVal, Cmp);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
}
+
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -4156,31 +4458,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+
+ // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
+ // is "undef". We wanted 0, so CSEL it directly.
+ SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+ ISD::SETEQ, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+ LoBitsForHi =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+ LoBitsForHi, CCVal, Cmp);
+
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
DAG.getConstant(VTBits, dl, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
- SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+ SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue HiForNormalShift =
+ DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
- SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
- ISD::SETGE, dl, DAG);
- SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
- SDValue Hi =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
+ Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+ dl, DAG);
+ CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+ SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+ HiForNormalShift, CCVal, Cmp);
// AArch64 shifts of larger than register sizes are wrapped rather than
// clamped, so we can't just emit "lo << a" if a is too big.
- SDValue TrueValLo = DAG.getConstant(0, dl, VT);
- SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Lo =
- DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+ SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
+ SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+ LoForNormalShift, CCVal, Cmp);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -4362,8 +4674,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
// Validate and return a target constant for them if we can.
case 'z': {
// 'z' maps to xzr or wzr so it needs an input of 0.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C || C->getZExtValue() != 0)
+ if (!isNullConstant(Op))
return;
if (Op.getValueType() == MVT::i64)
@@ -5653,11 +5964,10 @@ static SDValue NormalizeBuildVector(SDValue Op,
return Op;
SmallVector<SDValue, 16> Ops;
- for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
- SDValue Lane = Op.getOperand(I);
- if (Lane.getOpcode() == ISD::Constant) {
+ for (SDValue Lane : Op->ops()) {
+ if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
- cast<ConstantSDNode>(Lane)->getZExtValue());
+ CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
}
Ops.push_back(Lane);
@@ -5997,8 +6307,7 @@ FailedModImm:
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
- SDValue shuffle = ReconstructShuffle(Op, DAG);
- if (shuffle != SDValue())
+ if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
}
@@ -6017,7 +6326,10 @@ FailedModImm:
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
// value is already in an S or D register.
- if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
+ // Do not do this for UNDEF/LOAD nodes because we have better patterns
+ // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
+ if (Op0.getOpcode() != ISD::UNDEF && Op0.getOpcode() != ISD::LOAD &&
+ (ElemSize == 32 || ElemSize == 64)) {
unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
MachineSDNode *N =
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
@@ -6123,24 +6435,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned Val = Cst->getZExtValue();
unsigned Size = Op.getValueType().getSizeInBits();
- if (Val == 0) {
- switch (Size) {
- case 8:
- return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 16:
- return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 32:
- return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 64:
- return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
- Op.getOperand(0));
- default:
- llvm_unreachable("Unexpected vector type in extract_subvector!");
- }
- }
+
+ // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
+ if (Val == 0)
+ return Op;
+
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
@@ -6213,26 +6512,20 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation. For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-/// 1 <= |Value| <= ElementBits for a right shift; or
-/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
- int64_t &Cnt) {
+/// operand of a vector shift right operation. The value must be in the range:
+/// 1 <= Value <= ElementBits for a right shift; or
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
- if (isIntrinsic)
- Cnt = -Cnt;
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
@@ -6261,8 +6554,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
case ISD::SRA:
case ISD::SRL:
// Right shift immediate
- if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
- Cnt < EltSize) {
+ if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
@@ -6451,7 +6743,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
Info.offset = 0;
@@ -6477,7 +6769,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
@@ -6720,10 +7012,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- // Skip illegal vector types.
- if (VecSize != 64 && VecSize != 128)
+ // Skip if we do not have NEON and skip illegal vector types.
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
return false;
// A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -6806,10 +7098,10 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- // Skip illegal vector types.
- if (SubVecSize != 64 && SubVecSize != 128)
+ // Skip if we do not have NEON and skip illegal vector types.
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
return false;
Value *Op0 = SVI->getOperand(0);
@@ -7228,8 +7520,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
- SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
- if (Res != SDValue())
+ if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
EVT VT = N->getValueType(0);
@@ -7242,7 +7533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
// If the result of an integer load is only used by an integer-to-float
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
- // This eliminates an "integer-to-vector-move UOP and improve throughput.
+ // This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
@@ -7265,6 +7556,134 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Fold a floating-point multiply by power of two into floating-point to
+/// fixed-point conversion.
+static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ SDValue ConstVec = Op->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
+ if (FloatBits != 32 && FloatBits != 64)
+ return SDValue();
+
+ MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
+ if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+ return SDValue();
+
+ // Avoid conversions where iN is larger than the float (e.g., float -> i64).
+ if (IntBits > FloatBits)
+ return SDValue();
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t Bits = IntBits == 64 ? 64 : 32;
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
+ if (C == -1 || C == 0 || C > Bits)
+ return SDValue();
+
+ MVT ResTy;
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ switch (NumLanes) {
+ default:
+ return SDValue();
+ case 2:
+ ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+ break;
+ case 4:
+ ResTy = MVT::v4i32;
+ break;
+ }
+
+ SDLoc DL(N);
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
+ : Intrinsic::aarch64_neon_vcvtfp2fxu;
+ SDValue FixConv =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
+ DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
+ Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+ // We can handle smaller integers by generating an extra trunc.
+ if (IntBits < FloatBits)
+ FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
+
+ return FixConv;
+}
+
+/// Fold a floating-point divide by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue Op = N->getOperand(0);
+ unsigned Opc = Op->getOpcode();
+ if (!Op.getValueType().isVector() ||
+ (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+ return SDValue();
+
+ SDValue ConstVec = N->getOperand(1);
+ if (!isa<BuildVectorSDNode>(ConstVec))
+ return SDValue();
+
+ MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+ int32_t IntBits = IntTy.getSizeInBits();
+ if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+ return SDValue();
+
+ MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ int32_t FloatBits = FloatTy.getSizeInBits();
+ if (FloatBits != 32 && FloatBits != 64)
+ return SDValue();
+
+ // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
+ if (IntBits > FloatBits)
+ return SDValue();
+
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+ if (C == -1 || C == 0 || C > FloatBits)
+ return SDValue();
+
+ MVT ResTy;
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ switch (NumLanes) {
+ default:
+ return SDValue();
+ case 2:
+ ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+ break;
+ case 4:
+ ResTy = MVT::v4i32;
+ break;
+ }
+
+ SDLoc DL(N);
+ SDValue ConvInput = Op.getOperand(0);
+ bool IsSigned = Opc == ISD::SINT_TO_FP;
+ if (IntBits < FloatBits)
+ ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
+ ResTy, ConvInput);
+
+ unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
+ : Intrinsic::aarch64_neon_vcvtfxu2fp;
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+ DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
+ DAG.getConstant(C, DL, MVT::i32));
+}
+
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
@@ -7964,7 +8383,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
- break;
case Intrinsic::aarch64_neon_saddv:
return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
case Intrinsic::aarch64_neon_uaddv:
@@ -7978,10 +8396,16 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
- return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
- return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
+ return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_fmaxnm:
+ return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_fminnm:
+ return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
@@ -8141,7 +8565,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
unsigned Alignment = std::min(OrigAlignment, EltOffset);
// Create scalar stores. This is at least as good as the code sequence for a
- // split unaligned store wich is a dup.s, ext.b, and two stores.
+ // split unaligned store which is a dup.s, ext.b, and two stores.
// Most of the time the three stores should be replaced by store pair
// instructions (stp).
SDLoc DL(St);
@@ -8162,10 +8586,9 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
return NewST1;
}
-static SDValue performSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG,
- const AArch64Subtarget *Subtarget) {
+static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
if (!DCI.isBeforeLegalize())
return SDValue();
@@ -8173,15 +8596,17 @@ static SDValue performSTORECombine(SDNode *N,
if (S->isVolatile())
return SDValue();
+ // FIXME: The logic for deciding if an unaligned store should be split should
+ // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
+ // a call to that function here.
+
// Cyclone has bad performance on unaligned 16B stores when crossing line and
// page boundaries. We want to split such stores.
if (!Subtarget->isCyclone())
return SDValue();
- // Don't split at Oz.
- MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
- if (IsMinSize)
+ // Don't split at -Oz.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
return SDValue();
SDValue StVal = S->getValue();
@@ -8204,8 +8629,7 @@ static SDValue performSTORECombine(SDNode *N,
// If we get a splat of a scalar convert this vector store to a store of
// scalars. They will be merged into store pairs thereby removing two
// instructions.
- SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
- if (ReplacedSplat != SDValue())
+ if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S))
return ReplacedSplat;
SDLoc DL(S);
@@ -8326,6 +8750,299 @@ static SDValue performPostLD1Combine(SDNode *N,
return SDValue();
}
+/// Simplify \Addr given that the top byte of it is ignored by HW during
+/// address translation.
+static bool performTBISimplification(SDValue Addr,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ APInt DemandedMask = APInt::getLowBitsSet(64, 56);
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+ DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ return true;
+ }
+ return false;
+}
+
+static SDValue performSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ SDValue Split = split16BStores(N, DCI, DAG, Subtarget);
+ if (Split.getNode())
+ return Split;
+
+ if (Subtarget->supportsAddressTopByteIgnored() &&
+ performTBISimplification(N->getOperand(2), DCI, DAG))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
+ /// This function handles the log2-shuffle pattern produced by the
+/// LoopVectorizer for the across vector reduction. It consists of
+/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
+/// are reduced, where s is an induction variable from 0 to
+/// log2(NumVectorElements).
+static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
+ unsigned Op,
+ SelectionDAG &DAG) {
+ EVT VTy = OpV->getOperand(0).getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ int NumVecElts = VTy.getVectorNumElements();
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+ if (NumVecElts != 4)
+ return SDValue();
+ } else {
+ if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
+ return SDValue();
+ }
+
+ int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
+ SDValue PreOp = OpV;
+ // Iterate over each step of the across vector reduction.
+ for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
+ SDValue CurOp = PreOp.getOperand(0);
+ SDValue Shuffle = PreOp.getOperand(1);
+ if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
+ // Try to swap the 1st and 2nd operand as add and min/max instructions
+ // are commutative.
+ CurOp = PreOp.getOperand(1);
+ Shuffle = PreOp.getOperand(0);
+ if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
+ return SDValue();
+ }
+
+ // Check if the input vector is fed by the operator we want to handle,
+ // except the last step; the very first input vector is not necessarily
+ // the same operator we are handling.
+ if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
+ return SDValue();
+
+ // Check if it forms one step of the across vector reduction.
+ // E.g.,
+ // %cur = add %1, %0
+ // %shuffle = vector_shuffle %cur, <2, 3, u, u>
+ // %pre = add %cur, %shuffle
+ if (Shuffle.getOperand(0) != CurOp)
+ return SDValue();
+
+ int NumMaskElts = 1 << CurStep;
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
+ // Check mask values in each step.
+ // We expect the shuffle mask in each step follows a specific pattern
+ // denoted here by the <M, U> form, where M is a sequence of integers
+ // starting from NumMaskElts, increasing by 1, and the number integers
+ // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
+ // of undef in U should be NumVecElts - NumMaskElts.
+ // E.g., for <8 x i16>, mask values in each step should be :
+ // step 0 : <1,u,u,u,u,u,u,u>
+ // step 1 : <2,3,u,u,u,u,u,u>
+ // step 2 : <4,5,6,7,u,u,u,u>
+ for (int i = 0; i < NumVecElts; ++i)
+ if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
+ (i >= NumMaskElts && !(Mask[i] < 0)))
+ return SDValue();
+
+ PreOp = CurOp;
+ }
+ unsigned Opcode;
+ bool IsIntrinsic = false;
+
+ switch (Op) {
+ default:
+ llvm_unreachable("Unexpected operator for across vector reduction");
+ case ISD::ADD:
+ Opcode = AArch64ISD::UADDV;
+ break;
+ case ISD::SMAX:
+ Opcode = AArch64ISD::SMAXV;
+ break;
+ case ISD::UMAX:
+ Opcode = AArch64ISD::UMAXV;
+ break;
+ case ISD::SMIN:
+ Opcode = AArch64ISD::SMINV;
+ break;
+ case ISD::UMIN:
+ Opcode = AArch64ISD::UMINV;
+ break;
+ case ISD::FMAXNUM:
+ Opcode = Intrinsic::aarch64_neon_fmaxnmv;
+ IsIntrinsic = true;
+ break;
+ case ISD::FMINNUM:
+ Opcode = Intrinsic::aarch64_neon_fminnmv;
+ IsIntrinsic = true;
+ break;
+ }
+ SDLoc DL(N);
+
+ return IsIntrinsic
+ ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+ DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
+ : DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
+ DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
+/// Target-specific DAG combine for the across vector min/max reductions.
+/// This function specifically handles the final clean-up step of the vector
+/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which narrows down and finds the final min/max value from all
+/// elements of the vector.
+/// For example, for a <16 x i8> vector :
+/// svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
+/// %smax0 = smax %arr, svn0
+/// %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %smax1 = smax %smax0, %svn1
+/// %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %smax2 = smax %smax1, svn2
+/// %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+/// %sc = setcc %smax2, %svn3, gt
+/// %n0 = extract_vector_elt %sc, #0
+/// %n1 = extract_vector_elt %smax2, #0
+/// %n2 = extract_vector_elt $smax2, #1
+/// %result = select %n0, %n1, n2
+/// becomes :
+/// %1 = smaxv %0
+/// %result = extract_vector_elt %1, 0
+static SDValue
+performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue IfTrue = N->getOperand(1);
+ SDValue IfFalse = N->getOperand(2);
+
+ // Check if the SELECT merges up the final result of the min/max
+ // from a vector.
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // Expect N0 is fed by SETCC.
+ SDValue SetCC = N0.getOperand(0);
+ EVT SetCCVT = SetCC.getValueType();
+ if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
+ SetCCVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ SDValue VectorOp = SetCC.getOperand(0);
+ unsigned Op = VectorOp->getOpcode();
+ // Check if the input vector is fed by the operator we want to handle.
+ if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
+ Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
+ return SDValue();
+
+ EVT VTy = VectorOp.getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ if (VTy.getSizeInBits() < 64)
+ return SDValue();
+
+ EVT EltTy = VTy.getVectorElementType();
+ if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+ if (EltTy != MVT::f32)
+ return SDValue();
+ } else {
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+ return SDValue();
+ }
+
+ // Check if extracting from the same vector.
+ // For example,
+ // %sc = setcc %vector, %svn1, gt
+ // %n0 = extract_vector_elt %sc, #0
+ // %n1 = extract_vector_elt %vector, #0
+ // %n2 = extract_vector_elt $vector, #1
+ if (!(VectorOp == IfTrue->getOperand(0) &&
+ VectorOp == IfFalse->getOperand(0)))
+ return SDValue();
+
+ // Check if the condition code is matched with the operator type.
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
+ (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
+ (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
+ (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
+ (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
+ CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
+ CC != ISD::SETGE) ||
+ (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
+ CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
+ CC != ISD::SETLE))
+ return SDValue();
+
+ // Expect to check only lane 0 from the vector SETCC.
+ if (!isNullConstant(N0.getOperand(1)))
+ return SDValue();
+
+ // Expect to extract the true value from lane 0.
+ if (!isNullConstant(IfTrue.getOperand(1)))
+ return SDValue();
+
+ // Expect to extract the false value from lane 1.
+ if (!isOneConstant(IfFalse.getOperand(1)))
+ return SDValue();
+
+ return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
+}
+
+/// Target-specific DAG combine for the across vector add reduction.
+/// This function specifically handles the final clean-up step of the vector
+/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which adds all elements of a vector together.
+/// For example, for a <4 x i32> vector :
+/// %1 = vector_shuffle %0, <2,3,u,u>
+/// %2 = add %0, %1
+/// %3 = vector_shuffle %2, <1,u,u,u>
+/// %4 = add %2, %3
+/// %result = extract_vector_elt %4, 0
+/// becomes :
+/// %0 = uaddv %0
+/// %result = extract_vector_elt %0, 0
+static SDValue
+performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!Subtarget->hasNEON())
+ return SDValue();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Check if the input vector is fed by the ADD.
+ if (N0->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // The vector extract idx must constant zero because we only expect the final
+ // result of the reduction is placed in lane 0.
+ if (!isNullConstant(N1))
+ return SDValue();
+
+ EVT VTy = N0.getValueType();
+ if (!VTy.isVector())
+ return SDValue();
+
+ EVT EltTy = VTy.getVectorElementType();
+ if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+ return SDValue();
+
+ if (VTy.getSizeInBits() < 64)
+ return SDValue();
+
+ return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
+}
+
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -8751,10 +9468,10 @@ static SDValue performBRCONDCombine(SDNode *N,
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return SDValue();
- if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
+ if (isNullConstant(LHS))
std::swap(LHS, RHS);
- if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
+ if (!isNullConstant(RHS))
return SDValue();
if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
@@ -8868,75 +9585,6 @@ static SDValue performSelectCombine(SDNode *N,
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
-/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match FMIN/FMAX patterns.
-static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
- // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
- // Unless the NoNaNsFPMath option is set, be careful about NaNs:
- // vmax/vmin return NaN if either operand is a NaN;
- // only do the transformation when it matches that behavior.
-
- SDValue CondLHS = N->getOperand(0);
- SDValue CondRHS = N->getOperand(1);
- SDValue LHS = N->getOperand(2);
- SDValue RHS = N->getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- unsigned Opcode;
- bool IsReversed;
- if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
- selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
- IsReversed = false; // x CC y ? x : y
- } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
- selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
- IsReversed = true ; // x CC y ? y : x
- } else {
- return SDValue();
- }
-
- bool IsUnordered = false, IsOrEqual;
- switch (CC) {
- default:
- return SDValue();
- case ISD::SETULT:
- case ISD::SETULE:
- IsUnordered = true;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETLT:
- case ISD::SETLE:
- IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
- Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
- break;
-
- case ISD::SETUGT:
- case ISD::SETUGE:
- IsUnordered = true;
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETGT:
- case ISD::SETGE:
- IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
- Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
- break;
- }
-
- // If LHS is NaN, an ordered comparison will be false and the result will be
- // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
- // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- return SDValue();
-
- // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
- // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
- // used for unsafe math or if one of the operands is known to be nonzero.
- if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- return SDValue();
-
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
-}
-
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
@@ -8961,6 +9609,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return performFpToIntCombine(N, DAG, Subtarget);
+ case ISD::FDIV:
+ return performFDivCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -8973,12 +9626,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performBitcastCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
- case ISD::SELECT:
- return performSelectCombine(N, DCI);
+ case ISD::SELECT: {
+ SDValue RV = performSelectCombine(N, DCI);
+ if (!RV.getNode())
+ RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
+ return RV;
+ }
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
- case ISD::SELECT_CC:
- return performSelectCCCombine(N, DCI.DAG);
+ case ISD::LOAD:
+ if (performTBISimplification(N->getOperand(1), DCI, DAG))
+ return SDValue(N, 0);
+ break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case AArch64ISD::BRCOND:
@@ -8991,6 +9650,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNVCASTCombine(N);
case ISD::INSERT_VECTOR_ELT:
return performPostLD1Combine(N, DCI, true);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -9157,6 +9818,20 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
+static void ReplaceReductionResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, unsigned InterOp,
+ unsigned AcrossOp) {
+ EVT LoVT, HiVT;
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
+ SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
+ Results.push_back(SplitVal);
+}
+
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
@@ -9165,6 +9840,24 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
+ case AArch64ISD::SADDV:
+ ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
+ return;
+ case AArch64ISD::UADDV:
+ ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
+ return;
+ case AArch64ISD::SMINV:
+ ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
+ return;
+ case AArch64ISD::UMINV:
+ ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
+ return;
+ case AArch64ISD::SMAXV:
+ ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
+ return;
+ case AArch64ISD::UMAXV:
+ ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
@@ -9177,10 +9870,10 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
return true;
}
-bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
- return NumUsers > 2;
+ return 3;
}
TargetLoweringBase::LegalizeTypeAction
@@ -9206,20 +9899,21 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
-bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return Size == 128;
+ return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128 ? AtomicRMWExpansionKind::LLSC
- : AtomicRMWExpansionKind::None;
+ return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
}
-bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
+bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
return true;
}
@@ -9258,6 +9952,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
cast<PointerType>(Addr->getType())->getElementType());
}
+void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(
+ llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+}
+
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
@@ -9294,3 +9995,70 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
return Ty->isArrayTy();
}
+
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+ EVT) const {
+ return false;
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!Subtarget->isTargetAndroid())
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ const unsigned TlsOffset = 0x48;
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ThreadPointerFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::aarch64_thread_pointer);
+ return IRB.CreatePointerCast(
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+}
+
+void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in AArch64unctionInfo.
+ AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void AArch64TargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (AArch64::GPR64RegClass.contains(*I))
+ RC = &AArch64::GPR64RegClass;
+ else if (AArch64::FPR64RegClass.contains(*I))
+ RC = &AArch64::FPR64RegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c73ce1e..e99616c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
+#include "AArch64.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/CallingConv.h"
@@ -58,13 +59,14 @@ enum NodeType : unsigned {
SBCS,
ANDS,
+ // Conditional compares. Operands: left,right,falsecc,cc,flags
+ CCMP,
+ CCMN,
+ FCCMP,
+
// Floating point comparison
FCMP,
- // Floating point max and min instructions.
- FMAX,
- FMIN,
-
// Scalar extract
EXTR,
@@ -217,8 +219,6 @@ class AArch64Subtarget;
class AArch64TargetMachine;
class AArch64TargetLowering : public TargetLowering {
- bool RequireStrictAlign;
-
public:
explicit AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI);
@@ -226,46 +226,35 @@ public:
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
- /// computeKnownBitsForTargetNode - Determine which of the bits specified in
- /// Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
+ /// Determine which of the bits specified in Mask are known to be either zero
+ /// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
APInt &KnownOne, const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
- /// allowsMisalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses of the specified type.
+ /// Returns true if the target allows unaligned memory accesses of the
+ /// specified type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
unsigned Align = 1,
- bool *Fast = nullptr) const override {
- if (RequireStrictAlign)
- return false;
- // FIXME: True for Cyclone, but not necessary others.
- if (Fast)
- *Fast = true;
- return true;
- }
+ bool *Fast = nullptr) const override;
- /// LowerOperation - Provide custom lowering hooks for some operations.
+ /// Provide custom lowering hooks for some operations.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
const char *getTargetNodeName(unsigned Opcode) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
-
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Addrspacecasts are always noops.
return true;
}
- /// createFastISel - This method returns a target specific FastISel object,
- /// or null if the target does not support "fast" ISel.
+ /// This method returns a target specific FastISel object, or null if the
+ /// target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
@@ -273,11 +262,11 @@ public:
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- /// isShuffleMaskLegal - Return true if the given shuffle mask can be
- /// codegen'd directly, or if it should be stack expanded.
+ /// Return true if the given shuffle mask can be codegen'd directly, or if it
+ /// should be stack expanded.
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
- /// getSetCCResultType - Return the ISD::SETCC ValueType
+ /// Return the ISD::SETCC ValueType.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -322,8 +311,8 @@ public:
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const override;
- /// isLegalAddressingMode - Return true if the addressing mode represented
- /// by AM is legal for this target, for a load/store of the specified type.
+ /// Return true if the addressing mode represented by AM is legal for this
+ /// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
@@ -335,10 +324,9 @@ public:
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+ /// returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -351,25 +339,65 @@ public:
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
- bool hasLoadLinkedStoreConditional() const override;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
+ /// If the target has a standard location for the unsafe stack pointer,
+ /// returns the address of that location. Otherwise, returns nullptr.
+ Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ // FIXME: This is a guess. Has this been defined yet?
+ return AArch64::X0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ // FIXME: This is a guess. Has this been defined yet?
+ return AArch64::X1;
+ }
+
+ bool isCheapToSpeculateCttz() const override {
+ return true;
+ }
+
+ bool isCheapToSpeculateCtlz() const override {
+ return true;
+ }
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
@@ -392,6 +420,8 @@ private:
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
bool isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet,
@@ -470,7 +500,7 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
@@ -516,6 +546,8 @@ private:
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
CallingConv::ID CallConv,
bool isVarArg) const override;
+
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
};
namespace AArch64 {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 3f2e772..6ac2175 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -248,6 +248,12 @@ def simm7s16 : Operand<i32> {
let PrintMethod = "printImmScale<16>";
}
+def am_indexed7s8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
+def am_indexed7s16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
+def am_indexed7s32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
+def am_indexed7s64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
+def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
let DiagnosticType = "InvalidImm" # Low # "_" # High;
@@ -346,9 +352,11 @@ class fixedpoint_i64<ValueType FloatVT>
let ParserMatchClass = Imm1_64Operand;
}
+def fixedpoint_f16_i32 : fixedpoint_i32<f16>;
def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
+def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
@@ -402,6 +410,7 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
+def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
def Imm0_31Operand : AsmImmRange<0, 31>;
@@ -525,6 +534,20 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_31Operand;
}
+// True if the 32-bit immediate is in the range [0,31]
+def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// imm0_1 predicate - True if the immediate is in the range [0,1]
+def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+}
+
// imm0_15 predicate - True if the immediate is in the range [0,15]
def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
return ((uint64_t)Imm) < 16;
@@ -542,7 +565,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
// imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 16;
-}]>;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
@@ -690,6 +715,17 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
}
// Floating-point immediate.
+def fpimm16 : Operand<f16>,
+ PatLeaf<(f16 fpimm), [{
+ return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP16Imm(InVal);
+ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
def fpimm32 : Operand<f32>,
PatLeaf<(f32 fpimm), [{
return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
@@ -822,7 +858,7 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
// model patterns with sufficiently fine granularity
let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
class HintI<string mnemonic>
- : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "",
+ : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "",
[(int_aarch64_hint imm0_127:$imm)]>,
Sched<[WriteHint]> {
bits <7> imm;
@@ -875,6 +911,25 @@ def msr_sysreg_op : Operand<i32> {
let PrintMethod = "printMSRSystemRegister";
}
+def PSBHintOperand : AsmOperandClass {
+ let Name = "PSBHint";
+ let ParserMethod = "tryParsePSBHint";
+}
+def psbhint_op : Operand<i32> {
+ let ParserMatchClass = PSBHintOperand;
+ let PrintMethod = "printPSBHintOp";
+ let MCOperandPredicate = [{
+ // Check, if operand is valid, to fix exhaustive aliasing in disassembly.
+ // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields.
+ if (!MCOp.isImm())
+ return false;
+ bool ValidNamed;
+ (void)AArch64PSBHint::PSBHintMapper().toString(MCOp.getImm(),
+ STI.getFeatureBits(), ValidNamed);
+ return ValidNamed;
+ }];
+}
+
class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
"mrs", "\t$Rt, $systemreg"> {
bits<16> systemreg;
@@ -890,19 +945,19 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
let Inst{20-5} = systemreg;
}
-def SystemPStateFieldOperand : AsmOperandClass {
- let Name = "SystemPStateField";
+def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_15";
let ParserMethod = "tryParseSysReg";
}
-def pstatefield_op : Operand<i32> {
- let ParserMatchClass = SystemPStateFieldOperand;
+def pstatefield4_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_15Operand;
let PrintMethod = "printSystemPStateField";
}
let Defs = [NZCV] in
-class MSRpstateI
- : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm),
- "msr", "\t$pstate_field, $imm">,
+class MSRpstateImm0_15
+ : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
+ "msr", "\t$pstatefield, $imm">,
Sched<[WriteSys]> {
bits<6> pstatefield;
bits<4> imm;
@@ -913,6 +968,37 @@ class MSRpstateI
let Inst{7-5} = pstatefield{2-0};
let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
+}
+
+def SystemPStateFieldWithImm0_1Operand : AsmOperandClass {
+ let Name = "SystemPStateFieldWithImm0_1";
+ let ParserMethod = "tryParseSysReg";
+}
+def pstatefield1_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldWithImm0_1Operand;
+ let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateImm0_1
+ : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
+ "msr", "\t$pstatefield, $imm">,
+ Sched<[WriteSys]> {
+ bits<6> pstatefield;
+ bit imm;
+ let Inst{20-19} = 0b00;
+ let Inst{18-16} = pstatefield{5-3};
+ let Inst{15-9} = 0b0100000;
+ let Inst{8} = imm;
+ let Inst{7-5} = pstatefield{2-0};
+
+ let DecoderMethod = "DecodeSystemPStateInstruction";
+ // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+ // Fail the decoder should attempt to decode the instruction as MSRI.
+ let hasCompleteDecoder = 0;
}
// SYS and SYSL generic system instructions.
@@ -1341,7 +1427,7 @@ multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
}
class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2), 0>;
class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
@@ -1407,13 +1493,13 @@ class MulHi<bits<3> opc, string asm, SDNode OpNode>
}
class MulAccumWAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
class MulAccumXAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
class WideMulAccumAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
@@ -1643,7 +1729,7 @@ class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
RegisterClass src1Regtype, RegisterClass src2Regtype,
int shiftExt>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
shiftExt)>;
@@ -1701,10 +1787,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
}
// add Rd, Rb, -imm -> sub Rd, Rn, imm
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1776,43 +1862,43 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
} // Defs = [NZCV]
// Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#" $Rd, $Rn, $imm",
+ def : InstAlias<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
// Compare aliases
- def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
- def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>;
- def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
+ def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
// Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
- def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
// Compare shorthands
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrs")
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrx")
WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
- def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
// Register/register aliases with no shift when SP is not used.
@@ -1998,7 +2084,7 @@ class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
// Aliases for register+register logical instructions.
class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
+ : InstAlias<asm#"\t$dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
@@ -2017,10 +2103,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
let Inst{31} = 1;
}
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2039,10 +2125,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
}
} // end Defs = [NZCV]
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2105,9 +2191,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
+class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
+ string mnemonic, SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2127,19 +2216,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
}
-multiclass CondSetFlagsImm<bit op, string asm> {
- def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
- let Inst{31} = 1;
- }
-}
-
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
+ SDNode OpNode>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
+ [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]>,
Sched<[WriteI, ReadI, ReadI]> {
let Uses = [NZCV];
let Defs = [NZCV];
@@ -2159,11 +2242,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
let Inst{3-0} = nzcv;
}
-multiclass CondSetFlagsReg<bit op, string asm> {
- def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
+multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
+ // immediate operand variants
+ def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
let Inst{31} = 0;
}
- def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
+ def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ // register operand variants
+ def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
let Inst{31} = 1;
}
}
@@ -2328,7 +2419,7 @@ multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2340,7 +2431,7 @@ multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2508,7 +2599,7 @@ class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
}
class ROInstAlias<string asm, RegisterClass regtype, Instruction INST>
- : InstAlias<asm # " $Rt, [$Rn, $Rm]",
+ : InstAlias<asm # "\t$Rt, [$Rn, $Rm]",
(INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
@@ -2934,7 +3025,7 @@ multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
(ins GPR64sp:$Rn, simm9:$offset), asm, pattern>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2946,7 +3037,7 @@ multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
asm, pattern>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2958,7 +3049,7 @@ multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm,
asm, pat>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -2993,7 +3084,7 @@ multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc,
(ins GPR64sp:$Rn, simm9:$offset), asm>,
Sched<[WriteLD]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -3005,7 +3096,7 @@ multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
asm>,
Sched<[WriteST]>;
- def : InstAlias<asm # " $Rt, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
@@ -3136,7 +3227,7 @@ multiclass LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
(ins GPR64sp:$Rn, indextype:$offset), asm>,
Sched<[WriteLD, WriteLDHi]>;
- def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, 0)>;
}
@@ -3151,7 +3242,7 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
asm>,
Sched<[WriteSTP]>;
- def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
(!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, 0)>;
}
@@ -3230,8 +3321,8 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
let mayStore = 1, mayLoad = 0 in
class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
Operand idxtype, string asm>
- : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
- (ins GPR64sp:$wback, regtype:$Rt, regtype:$Rt2,
+ : BaseLoadStorePairPostIdx<opc, V, 0, (outs GPR64sp:$wback),
+ (ins regtype:$Rt, regtype:$Rt2,
GPR64sp:$Rn, idxtype:$offset),
asm>,
Sched<[WriteAdr, WriteSTP]>;
@@ -3477,6 +3568,20 @@ class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
+ // Unscaled half-precision to 32-bit
+ def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm,
+ [(set GPR32:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ // Unscaled half-precision to 64-bit
+ def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm,
+ [(set GPR64:$Rd, (OpN FPR16:$Rn))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
// Unscaled single-precision to 32-bit
def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
[(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
@@ -3504,6 +3609,25 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
SDPatternOperator OpN> {
+ // Scaled half-precision to 32-bit
+ def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32,
+ fixedpoint_f16_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
+ // Scaled half-precision to 64-bit
+ def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64,
+ fixedpoint_f16_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn,
+ fixedpoint_f16_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Predicates = [HasFullFP16];
+ }
+
// Scaled single-precision to 32-bit
def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
fixedpoint_f32_i32, asm,
@@ -3553,7 +3677,7 @@ class BaseIntegerToFP<bit isUnsigned,
bits<5> Rd;
bits<5> Rn;
bits<6> scale;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b00001;
let Inst{16} = isUnsigned;
let Inst{15-10} = scale;
@@ -3570,7 +3694,7 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
bits<5> Rd;
bits<5> Rn;
bits<6> scale;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21-17} = 0b10001;
let Inst{16} = isUnsigned;
let Inst{15-10} = 0b000000;
@@ -3580,33 +3704,55 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
// Unscaled
+ def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
}
def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
// Scaled
+ def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f16_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let scale{5} = 1;
+ let Predicates = [HasFullFP16];
+ }
+
def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
[(set FPR32:$Rd,
(fdiv (node GPR32:$Rn),
fixedpoint_f32_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
let scale{5} = 1;
}
@@ -3615,16 +3761,25 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
(fdiv (node GPR32:$Rn),
fixedpoint_f64_i32:$scale))]> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
let scale{5} = 1;
}
+ def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+ [(set FPR16:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f16_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
[(set FPR32:$Rd,
(fdiv (node GPR64:$Rn),
fixedpoint_f32_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
@@ -3632,7 +3787,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
(fdiv (node GPR64:$Rn),
fixedpoint_f64_i64:$scale))]> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
}
@@ -3654,7 +3809,7 @@ class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
Sched<[WriteFCopy]> {
bits<5> Rd;
bits<5> Rn;
- let Inst{30-23} = 0b00111100;
+ let Inst{30-24} = 0b0011110;
let Inst{21} = 1;
let Inst{20-19} = rmode;
let Inst{18-16} = opcode;
@@ -3704,26 +3859,49 @@ class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
}
-
multiclass UnscaledConversion<string asm> {
+ def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
+ }
+
+ def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
+ }
+
+ def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{23-22} = 0b11; // 16-bit FPR flag
+ let Predicates = [HasFullFP16];
}
def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
+ let Inst{23-22} = 0b00; // 32-bit FPR flag
}
def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
+ let Inst{23-22} = 0b01; // 64-bit FPR flag
}
def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
@@ -3796,7 +3974,7 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
Sched<[WriteF]> {
bits<5> Rd;
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21-19} = 0b100;
let Inst{18-15} = opcode;
let Inst{14-10} = 0b10000;
@@ -3806,12 +3984,17 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass SingleOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
+ def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3828,7 +4011,7 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = opcode;
@@ -3839,28 +4022,41 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
multiclass TwoOperandFPData<bits<4> opcode, string asm,
SDPatternOperator node = null_frag> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set (f16 FPR16:$Rd),
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
[(set (f32 FPR32:$Rd),
(node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
[(set (f64 FPR64:$Rd),
(node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+ def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+ [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
[(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
[(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3878,7 +4074,7 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
bits<5> Rn;
bits<5> Rm;
bits<5> Ra;
- let Inst{31-23} = 0b000111110;
+ let Inst{31-24} = 0b00011111;
let Inst{21} = isNegated;
let Inst{20-16} = Rm;
let Inst{15} = isSub;
@@ -3889,16 +4085,23 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
SDPatternOperator node> {
+ def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm,
+ [(set FPR16:$Rd,
+ (node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> {
+ let Inst{23-22} = 0b11; // 16-bit size flag
+ let Predicates = [HasFullFP16];
+ }
+
def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
[(set FPR32:$Rd,
(node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
- let Inst{22} = 0; // 32-bit size flag
+ let Inst{23-22} = 0b00; // 32-bit size flag
}
def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
[(set FPR64:$Rd,
(node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
- let Inst{22} = 1; // 64-bit size flag
+ let Inst{23-22} = 0b01; // 64-bit size flag
}
}
@@ -3913,7 +4116,7 @@ class BaseOneOperandFPComparison<bit signalAllNans,
: I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
Sched<[WriteFCmp]> {
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{15-10} = 0b001000;
@@ -3932,7 +4135,7 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
Sched<[WriteFCmp]> {
bits<5> Rm;
bits<5> Rn;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-10} = 0b001000;
@@ -3944,24 +4147,36 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
multiclass FPComparison<bit signalAllNans, string asm,
SDPatternOperator OpNode = null_frag> {
let Defs = [NZCV] in {
+ def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
+ def Hri : BaseOneOperandFPComparison<signalAllNans, FPR16, asm,
+ [(OpNode (f16 FPR16:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
[(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
[(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
[(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
[(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
} // Defs = [NZCV]
}
@@ -3971,17 +4186,20 @@ multiclass FPComparison<bit signalAllNans, string asm,
//---
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
- RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
+ string mnemonic, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+ mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
Sched<[WriteFCmp]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
bits<5> Rn;
bits<5> Rm;
bits<4> nzcv;
bits<4> cond;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = cond;
@@ -3991,16 +4209,24 @@ class BaseFPCondComparison<bit signalAllNans,
let Inst{3-0} = nzcv;
}
-multiclass FPCondComparison<bit signalAllNans, string asm> {
- let Defs = [NZCV], Uses = [NZCV] in {
- def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
- let Inst{22} = 0;
+multiclass FPCondComparison<bit signalAllNans, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic, []> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
}
- def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
- let Inst{22} = 1;
+ def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
+ [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b00;
+ }
+
+ def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
+ [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
+ (i32 imm:$cond), NZCV))]> {
+ let Inst{23-22} = 0b01;
}
- } // Defs = [NZCV], Uses = [NZCV]
}
//---
@@ -4019,7 +4245,7 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
bits<5> Rm;
bits<4> cond;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-16} = Rm;
let Inst{15-12} = cond;
@@ -4030,12 +4256,17 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
multiclass FPCondSelect<string asm> {
let Uses = [NZCV] in {
+ def Hrrr : BaseFPCondSelect<FPR16, f16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
} // Uses = [NZCV]
}
@@ -4050,7 +4281,7 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
Sched<[WriteFImm]> {
bits<5> Rd;
bits<8> imm;
- let Inst{31-23} = 0b000111100;
+ let Inst{31-24} = 0b00011110;
let Inst{21} = 1;
let Inst{20-13} = imm;
let Inst{12-5} = 0b10000000;
@@ -4058,12 +4289,17 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
}
multiclass FPMoveImmediate<string asm> {
+ def Hi : BaseFPMoveImmediate<FPR16, fpimm16, asm> {
+ let Inst{23-22} = 0b11;
+ let Predicates = [HasFullFP16];
+ }
+
def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
- let Inst{22} = 0;
+ let Inst{23-22} = 0b00;
}
def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
- let Inst{22} = 1;
+ let Inst{23-22} = 0b01;
}
}
} // end of 'let Predicates = [HasFPARMv8]'
@@ -4079,7 +4315,7 @@ let Predicates = [HasNEON] in {
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
RegisterOperand regtype, string asm, string kind,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -4093,8 +4329,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -4103,7 +4338,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
}
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
RegisterOperand regtype, string asm, string kind,
list<dag> pattern>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
@@ -4117,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{30} = Q;
let Inst{29} = U;
let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -4129,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
// All operand sizes distinguished in the encoding.
multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
- def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128,
asm, ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
}
@@ -4155,49 +4389,49 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
// As above, but D sized elements unsupported.
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
}
multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$dst),
(OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$dst),
(OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$dst),
(OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$dst),
(OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$dst),
(OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
@@ -4206,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
// As above, but only B sized elements supported.
multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd),
(OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
}
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
+// As above, but only floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
}
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64,
+ asm, ".4h",
+ [(set (v4f16 V64:$dst),
+ (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+ def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128,
+ asm, ".8h",
+ [(set (v8f16 V128:$dst),
+ (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64,
asm, ".2s",
[(set (v2f32 V64:$dst),
(OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128,
asm, ".4s",
[(set (v4f32 V128:$dst),
(OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128,
asm, ".2d",
[(set (v2f64 V128:$dst),
(OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
@@ -4262,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
// As above, but D and B sized elements unsupported.
multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
asm, ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
asm, ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
asm, ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
asm, ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
}
@@ -4279,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
// Logical three vector ops share opcode bits, and only use B sized elements.
multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
+ def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64,
asm, ".8b",
[(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
+ def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128,
asm, ".16b",
[(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
@@ -4303,11 +4563,11 @@ multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
asm, ".8b",
[(set (v8i8 V64:$dst),
(OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128,
asm, ".16b",
[(set (v16i8 V128:$dst),
(OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
@@ -4347,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
+ bits<2> size2, RegisterOperand regtype, string asm,
+ string dstkind, string srckind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
@@ -4360,7 +4620,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4369,8 +4631,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
+ bits<2> size2, RegisterOperand regtype,
+ string asm, string dstkind, string srckind,
+ list<dag> pattern>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
@@ -4382,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4392,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
// Supports B, H, and S element sizes.
multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
@@ -4450,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS {
// Supports all element sizes.
multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".4h", ".8b",
[(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".8h", ".16b",
[(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".2s", ".4h",
[(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".4s", ".8h",
[(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".1d", ".2s",
[(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".2d", ".4s",
[(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
asm, ".4h", ".8b",
[(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
(v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
asm, ".8h", ".16b",
[(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
(v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
asm, ".2s", ".4h",
[(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
(v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
asm, ".4s", ".8h",
[(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
(v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
asm, ".1d", ".2s",
[(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
(v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
asm, ".2d", ".4s",
[(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
(v4i32 V128:$Rn)))]>;
@@ -4501,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
// Supports all element sizes, except 1xD.
multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
}
multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
+ def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
@@ -4553,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
// Supports only B element sizes.
multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
@@ -4565,16 +4830,16 @@ multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
// Supports only B and H element sizes.
multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
asm, ".8b", ".8b",
[(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
asm, ".16b", ".16b",
[(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
asm, ".4h", ".4h",
[(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
asm, ".8h", ".8h",
[(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
}
@@ -4583,13 +4848,21 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
// as an extra opcode bit.
multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
@@ -4597,10 +4870,10 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
// Supports only S element size.
multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
}
@@ -4608,26 +4881,42 @@ multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
}
multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
asm, ".2s", ".2s",
[(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
asm, ".4s", ".4s",
[(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
asm, ".2d", ".2d",
[(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
@@ -4706,10 +4995,10 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
}
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype,
- string asm, string kind, string zero,
- ValueType dty, ValueType sty, SDNode OpNode>
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
+ bits<5> opcode, RegisterOperand regtype, string asm,
+ string kind, string zero, ValueType dty,
+ ValueType sty, SDNode OpNode>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
@@ -4722,7 +5011,9 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -4732,54 +5023,74 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
// Comparisons support all element sizes, except 1xD.
multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
SDNode OpNode> {
- def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
+ def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
asm, ".8b", "0",
v8i8, v8i8, OpNode>;
- def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
+ def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128,
asm, ".16b", "0",
v16i8, v16i8, OpNode>;
- def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64,
asm, ".4h", "0",
v4i16, v4i16, OpNode>;
- def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128,
asm, ".8h", "0",
v8i16, v8i16, OpNode>;
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64,
asm, ".2s", "0",
v2i32, v2i32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128,
asm, ".4s", "0",
v4i32, v4i32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128,
asm, ".2d", "0",
v2i64, v2i64, OpNode>;
}
-// FP Comparisons support only S and D element sizes.
+// FP Comparisons support only S and D element sizes (and H for v8.2a).
multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
string asm, SDNode OpNode> {
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
+ asm, ".4h", "0.0",
+ v4i16, v4f16, OpNode>;
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128,
+ asm, ".8h", "0.0",
+ v8i16, v8f16, OpNode>;
+ } // Predicates = [HasNEON, HasFullFP16]
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64,
asm, ".2s", "0.0",
v2i32, v2f32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128,
asm, ".4s", "0.0",
v4i32, v4f32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
asm, ".2d", "0.0",
v2i64, v2f64, OpNode>;
- def : InstAlias<asm # " $Vd.2s, $Vn.2s, #0",
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0",
(!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
- def : InstAlias<asm # " $Vd.4s, $Vn.4s, #0",
+ def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0",
(!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # " $Vd.2d, $Vn.2d, #0",
+ def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0",
(!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # ".2s $Vd, $Vn, #0",
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # ".4h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # ".8h\t$Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+ }
+ def : InstAlias<asm # ".2s\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
- def : InstAlias<asm # ".4s $Vd, $Vn, #0",
+ def : InstAlias<asm # ".4s\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
- def : InstAlias<asm # ".2d $Vd, $Vn, #0",
+ def : InstAlias<asm # ".2d\t$Vd, $Vn, #0",
(!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
}
@@ -5325,7 +5636,7 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
//----------------------------------------------------------------------------
let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
RegisterClass regtype, string asm,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -5337,8 +5648,7 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{31-30} = 0b01;
let Inst{29} = U;
let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
+ let Inst{23-21} = size;
let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
let Inst{10} = 1;
@@ -5369,17 +5679,17 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
}
multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ def v1i64 : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
- def v1i8 : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+ def v1i8 : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
@@ -5389,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
+ def v1i32 : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
}
multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
@@ -5404,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
asm, []>;
}
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
}
def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
}
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+ []>;
+ } // Predicates = [HasNEON, HasFullFP16]
}
def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -5482,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, RegisterClass regtype2,
string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
@@ -5494,7 +5812,9 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -5523,7 +5843,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, string asm, string zero>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"\t$Rd, $Rn, #" # zero, "", []>,
@@ -5534,7 +5854,9 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21} = 0b1;
+ let Inst{20-19} = size2;
+ let Inst{18-17} = 0b00;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
let Inst{9-5} = Rn;
@@ -5556,21 +5878,28 @@ class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm, "0">;
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
def : Pat<(v1i64 (OpNode FPR64:$Rn)),
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
}
-multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm, "0.0">;
- def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm, "0.0">;
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
+ def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16rz : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
+ }
- def : InstAlias<asm # " $Rd, $Rn, #0",
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
- def : InstAlias<asm # " $Rd, $Rn, #0",
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
(!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : InstAlias<asm # "\t$Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>;
+ }
def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
(!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
@@ -5578,35 +5907,42 @@ multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
[(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
-multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+ }
}
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
[(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
[(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+ [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+ }
}
multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
}
def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
@@ -5633,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm,
[(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>;
}
//----------------------------------------------------------------------------
@@ -5668,10 +6004,14 @@ multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
asm, ".2d">;
}
-multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
+multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
+ asm, ".2h">;
+ }
+ def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64,
asm, ".2s">;
- def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
+ def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128,
asm, ".2d">;
}
@@ -5727,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
asm, ".4s", []>;
}
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
+multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
Intrinsic intOp> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
+ asm, ".4h",
+ [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+ def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
+ asm, ".8h",
+ [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+ } // Predicates = [HasNEON, HasFullFP16]
def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
asm, ".4s",
[(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
@@ -5925,7 +6273,7 @@ class SIMDInsMainMovAlias<string size, Instruction inst,
class SIMDInsElementMovAlias<string size, Instruction inst,
Operand idxtype>
: InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
- # "|" # size #" $dst$idx, $src$idx2}",
+ # "|" # size #"\t$dst$idx, $src$idx2}",
(inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
@@ -6215,7 +6563,7 @@ multiclass SIMDScalarCPY<string asm> {
// AdvSIMD modified immediate instructions
//----------------------------------------------------------------------------
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
+class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
string asm, string op_string,
string cstr, list<dag> pattern>
: I<oops, iops, asm, op_string, cstr, pattern>,
@@ -6227,16 +6575,17 @@ class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
let Inst{29} = op;
let Inst{28-19} = 0b0111100000;
let Inst{18-16} = imm8{7-5};
- let Inst{11-10} = 0b01;
+ let Inst{11} = op2;
+ let Inst{10} = 1;
let Inst{9-5} = imm8{4-0};
let Inst{4-0} = Rd;
}
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
+class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype,
Operand immtype, dag opt_shift_iop,
string opt_shift, string asm, string kind,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
+ : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd),
!con((ins immtype:$imm8), opt_shift_iop), asm,
"{\t$Rd" # kind # ", $imm8" # opt_shift #
"|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6248,7 +6597,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
Operand immtype, dag opt_shift_iop,
string opt_shift, string asm, string kind,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
+ : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst),
!con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
"|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6259,7 +6608,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins logical_vec_shift:$shift),
"$shift", asm, kind, pattern> {
bits<2> shift;
@@ -6284,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins logical_vec_hw_shift:$shift),
"$shift", asm, kind, pattern> {
bits<2> shift;
@@ -6349,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
RegisterOperand vectype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
(ins move_vec_shift:$shift),
"$shift", asm, kind, pattern> {
bits<1> shift;
@@ -6357,18 +6706,18 @@ class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
let Inst{12} = shift;
}
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode,
RegisterOperand vectype,
Operand imm_type, string asm,
string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
+ : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "",
asm, kind, pattern> {
let Inst{15-12} = cmode;
}
class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+ : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
"\t$Rd, $imm8", "", pattern> {
let Inst{15-12} = cmode;
let DecoderMethod = "DecodeModImmInstruction";
@@ -6438,8 +6787,36 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
let Inst{4-0} = Rd;
}
-multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
+multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
+ V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4f16 V64:$Rd),
+ (OpNode (v4f16 V64:$Rn),
+ (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8f16 V128:$Rd),
+ (OpNode (v8f16 V128:$Rn),
+ (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
V64, V64,
V128, VectorIndexS,
@@ -6476,6 +6853,21 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
let Inst{21} = 0;
}
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h",
+ [(set (f16 FPR16Op:$Rd),
+ (OpNode (f16 FPR16Op:$Rn),
+ (f16 (vector_extract (v8f16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
FPR32Op, FPR32Op, V128, VectorIndexS,
asm, ".s", "", "", ".s",
@@ -6501,7 +6893,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
}
}
-multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
+multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
// 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 V128:$Rm),
@@ -6553,7 +6945,28 @@ multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
V128:$Rm, VectorIndexD:$idx)>;
}
-multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
+multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
V128, VectorIndexS,
asm, ".2s", ".2s", ".2s", ".s", []> {
@@ -6580,6 +6993,16 @@ multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
let Inst{21} = 0;
}
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
FPR32Op, FPR32Op, V128, VectorIndexS,
@@ -7117,7 +7540,13 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
}
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
+multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR32, vecshiftR32, asm, []> {
let Inst{20-16} = imm{4-0};
@@ -7297,6 +7726,23 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
@@ -7322,8 +7768,26 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
}
}
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
+multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
Intrinsic OpNode> {
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+ } // Predicates = [HasNEON, HasFullFP16]
+
def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
V64, V64, vecshiftR32,
asm, ".2s", ".2s",
@@ -8604,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in {
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand regtype, string asm,
string kind, list<dag> pattern>
- : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind,
+ : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind,
pattern> {
- let Inst{21}=0;
}
multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
SDPatternOperator Accum> {
@@ -9041,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">;
def : TokenAlias<".4S", ".4s">;
def : TokenAlias<".2D", ".2d">;
def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".2H", ".2h">;
def : TokenAlias<".B", ".b">;
def : TokenAlias<".H", ".h">;
def : TokenAlias<".S", ".s">;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c0b3f2c..3ef3c8b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
-#include "AArch64MachineCombinerPattern.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -533,6 +532,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
CC);
}
+/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
+static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
+ uint64_t Imm = MI->getOperand(1).getImm();
+ uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+ uint64_t Encoding;
+ return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
+}
+
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
@@ -573,6 +580,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
case AArch64::ORRWrr:
case AArch64::ORRXrr:
return true;
+ // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
+ // ORRXri, it is as cheap as MOV
+ case AArch64::MOVi32imm:
+ return canBeExpandedToORR(MI, 32);
+ case AArch64::MOVi64imm:
+ return canBeExpandedToORR(MI, 64);
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
@@ -1379,42 +1392,34 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
Width = 1;
Scale = 1;
break;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ Scale = Width = 16;
+ break;
case AArch64::LDRXui:
+ case AArch64::LDRDui:
case AArch64::STRXui:
+ case AArch64::STRDui:
Scale = Width = 8;
break;
case AArch64::LDRWui:
+ case AArch64::LDRSui:
case AArch64::STRWui:
+ case AArch64::STRSui:
Scale = Width = 4;
break;
- case AArch64::LDRBui:
- case AArch64::STRBui:
- Scale = Width = 1;
- break;
case AArch64::LDRHui:
+ case AArch64::LDRHHui:
case AArch64::STRHui:
+ case AArch64::STRHHui:
Scale = Width = 2;
break;
- case AArch64::LDRSui:
- case AArch64::STRSui:
- Scale = Width = 4;
- break;
- case AArch64::LDRDui:
- case AArch64::STRDui:
- Scale = Width = 8;
- break;
- case AArch64::LDRQui:
- case AArch64::STRQui:
- Scale = Width = 16;
- break;
+ case AArch64::LDRBui:
case AArch64::LDRBBui:
+ case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
break;
- case AArch64::LDRHHui:
- case AArch64::STRHHui:
- Scale = Width = 2;
- break;
};
BaseReg = LdSt->getOperand(1).getReg();
@@ -1445,23 +1450,43 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
MachineInstr *Second) const {
- // Cyclone can fuse CMN, CMP followed by Bcc.
-
- // FIXME: B0 can also fuse:
- // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
- if (Second->getOpcode() != AArch64::Bcc)
- return false;
- switch (First->getOpcode()) {
- default:
- return false;
- case AArch64::SUBSWri:
- case AArch64::ADDSWri:
- case AArch64::ANDSWri:
- case AArch64::SUBSXri:
- case AArch64::ADDSXri:
- case AArch64::ANDSXri:
- return true;
+ if (Subtarget.isCyclone()) {
+ // Cyclone can fuse CMN, CMP, TST followed by Bcc.
+ unsigned SecondOpcode = Second->getOpcode();
+ if (SecondOpcode == AArch64::Bcc) {
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case AArch64::SUBSWri:
+ case AArch64::ADDSWri:
+ case AArch64::ANDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::ADDSXri:
+ case AArch64::ANDSXri:
+ return true;
+ }
+ }
+ // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+ SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::ANDWri:
+ case AArch64::ANDXri:
+ case AArch64::EORWri:
+ case AArch64::EORXri:
+ case AArch64::ORRWri:
+ case AArch64::ORRXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ return true;
+ }
+ }
}
+ return false;
}
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
@@ -1814,7 +1839,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
unsigned Opc = 0;
@@ -1911,7 +1936,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
@@ -2226,11 +2251,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::LDPDi:
case AArch64::STPXi:
case AArch64::STPDi:
+ case AArch64::LDNPXi:
+ case AArch64::LDNPDi:
+ case AArch64::STNPXi:
+ case AArch64::STNPDi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 8;
break;
case AArch64::LDPQi:
case AArch64::STPQi:
+ case AArch64::LDNPQi:
+ case AArch64::STNPQi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 16;
break;
@@ -2238,6 +2271,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
case AArch64::LDPSi:
case AArch64::STPWi:
case AArch64::STPSi:
+ case AArch64::LDNPWi:
+ case AArch64::LDNPSi:
+ case AArch64::STNPWi:
+ case AArch64::STNPSi:
+ ImmIdx = 3;
IsSigned = true;
Scale = 4;
break;
@@ -2457,7 +2495,7 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
bool AArch64InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -2485,76 +2523,76 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
"ADDWrr does not have register operands");
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
Found = true;
}
break;
case AArch64::ADDXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
Found = true;
}
break;
case AArch64::SUBWrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
Found = true;
}
break;
case AArch64::SUBXrr:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
Found = true;
}
if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+ Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
Found = true;
}
break;
case AArch64::ADDWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
Found = true;
}
break;
case AArch64::ADDXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
Found = true;
}
break;
case AArch64::SUBWri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
AArch64::WZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
Found = true;
}
break;
case AArch64::SUBXri:
if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
AArch64::XZR)) {
- Patterns.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+ Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
Found = true;
}
break;
@@ -2661,7 +2699,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
/// this function generates the instructions that could replace the
/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -2677,13 +2715,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
default:
// signal error.
break;
- case MachineCombinerPattern::MC_MULADDW_OP1:
- case MachineCombinerPattern::MC_MULADDX_OP1:
+ case MachineCombinerPattern::MULADDW_OP1:
+ case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
// ADD R,I,C
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
+ if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2692,13 +2730,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULADDW_OP2:
- case MachineCombinerPattern::MC_MULADDX_OP2:
+ case MachineCombinerPattern::MULADDW_OP2:
+ case MachineCombinerPattern::MULADDX_OP2:
// MUL I=A,B,0
// ADD R,C,I
// ==> MADD R,A,B,C
// --- Create(MADD);
- if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
+ if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2707,8 +2745,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULADDWI_OP1:
- case MachineCombinerPattern::MC_MULADDXI_OP1: {
+ case MachineCombinerPattern::MULADDWI_OP1:
+ case MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
// ==> ORR V, ZR, Imm
@@ -2716,7 +2754,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
+ if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -2751,8 +2789,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
- case MachineCombinerPattern::MC_MULSUBW_OP1:
- case MachineCombinerPattern::MC_MULSUBX_OP1: {
+ case MachineCombinerPattern::MULSUBW_OP1:
+ case MachineCombinerPattern::MULSUBX_OP1: {
// MUL I=A,B,0
// SUB R,I, C
// ==> SUB V, 0, C
@@ -2760,7 +2798,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *SubRC;
unsigned SubOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
+ if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
SubOpc = AArch64::SUBWrr;
SubRC = &AArch64::GPR32spRegClass;
ZeroReg = AArch64::WZR;
@@ -2784,13 +2822,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
- case MachineCombinerPattern::MC_MULSUBW_OP2:
- case MachineCombinerPattern::MC_MULSUBX_OP2:
+ case MachineCombinerPattern::MULSUBW_OP2:
+ case MachineCombinerPattern::MULSUBX_OP2:
// MUL I=A,B,0
// SUB R,C,I
// ==> MSUB R,A,B,C (computes C - A*B)
// --- Create(MSUB);
- if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
+ if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Opc = AArch64::MSUBWrrr;
RC = &AArch64::GPR32RegClass;
} else {
@@ -2799,8 +2837,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
- case MachineCombinerPattern::MC_MULSUBWI_OP1:
- case MachineCombinerPattern::MC_MULSUBXI_OP1: {
+ case MachineCombinerPattern::MULSUBWI_OP1:
+ case MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> ORR V, ZR, -Imm
@@ -2808,7 +2846,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
- if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
+ if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
@@ -2944,3 +2982,34 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
MI->eraseFromParent();
return true;
}
+
+std::pair<unsigned, unsigned>
+AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = AArch64II::MO_FRAGMENT;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace AArch64II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_PAGE, "aarch64-page"},
+ {MO_PAGEOFF, "aarch64-pageoff"},
+ {MO_G3, "aarch64-g3"},
+ {MO_G2, "aarch64-g2"},
+ {MO_G1, "aarch64-g1"},
+ {MO_G0, "aarch64-g0"},
+ {MO_HI12, "aarch64-hi12"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace AArch64II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_GOT, "aarch64-got"},
+ {MO_NC, "aarch64-nc"},
+ {MO_TLS, "aarch64-tls"},
+ {MO_CONSTPOOL, "aarch64-constant-pool"}};
+ return makeArrayRef(TargetFlags);
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 68c2a28..ae02822 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -167,13 +167,13 @@ public:
/// for an instruction chain ending in <Root>. All potential patterns are
/// listed in the <Patterns> array.
bool getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns)
+ SmallVectorImpl<MachineCombinerPattern> &Patterns)
const override;
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+ MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
@@ -181,6 +181,14 @@ public:
bool useMachineCombiner() const override;
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
+
private:
void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
MachineBasicBlock *TBB,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fa1a46a..d02bc9f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -16,6 +16,8 @@
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -24,6 +26,12 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
+def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasSPE : Predicate<"Subtarget->hasSPE()">,
+ AssemblerPredicate<"FeatureSPE", "spe">;
+
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -66,6 +74,20 @@ def SDT_AArch64CSel : SDTypeProfile<1, 4,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisInt<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+ [SDTCisVT<0, i32>,
+ SDTCisFP<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisInt<3>,
+ SDTCisInt<4>,
+ SDTCisVT<5, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
[SDTCisFP<0>,
SDTCisSameAs<0, 1>]>;
@@ -160,13 +182,14 @@ def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
+def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
+def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
-def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
-def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
-
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
@@ -361,6 +384,9 @@ def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
+// v8.2a Statistical Profiling extension
+def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
+
// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
@@ -383,12 +409,17 @@ def : InstAlias<"isb", (ISB 0xf)>;
def MRS : MRSI;
def MSR : MSRI;
-def MSRpstate: MSRpstateI;
+def MSRpstateImm1 : MSRpstateImm0_1;
+def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
// TPIDR_EL0.
def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// The cycle counter PMC register is PMCCNTR_EL0.
+let Predicates = [HasPerfMon] in
+def : Pat<(readcyclecounter), (MRS 0xdce8)>;
+
// Generic system instructions
def SYSxt : SystemXtI<0, "sys">;
def SYSLxt : SystemLXtI<1, "sysl">;
@@ -595,10 +626,12 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
(SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
(SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
+let AddedComplexity = 1 in {
def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
(SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
(SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+}
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
@@ -823,7 +856,7 @@ defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
defm EON : LogicalReg<0b10, 1, "eon",
- BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
+ BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
@@ -1020,13 +1053,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
+// Conditional comparison instructions.
//===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
//===----------------------------------------------------------------------===//
// Conditional select instructions.
@@ -2421,6 +2451,26 @@ defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvt
defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
}
+multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> {
+ def : Pat<(i32 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int (round f32:$Rn))),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int (round f64:$Rn))),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fceil, "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fceil, "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, frnd, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, frnd, "FCVTAU">;
+
//===----------------------------------------------------------------------===//
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
@@ -2466,14 +2516,7 @@ defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
(FRINTNDr FPR64:$Rn)>;
-// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
-// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
-// <rdar://problem/13715968>
-// TODO: We should really model the FPSR flags correctly. This is really ugly.
-let hasSideEffects = 1 in {
defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-}
-
defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
let SchedRW = [WriteFDiv] in {
@@ -2488,23 +2531,23 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
let SchedRW = [WriteFDiv] in {
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
}
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
let SchedRW = [WriteFMul] in {
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
-def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
//===----------------------------------------------------------------------===//
@@ -2556,7 +2599,7 @@ defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
//===----------------------------------------------------------------------===//
defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP : FPCondComparison<0, "fccmp">;
+defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional select instruction.
@@ -2589,6 +2632,40 @@ defm FMOV : FPMoveImmediate<"fmov">;
// Advanced SIMD two vector instructions.
//===----------------------------------------------------------------------===//
+defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
+ int_aarch64_neon_uabd>;
+// Match UABDL in log2-shuffle patterns.
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (v8i8 V64:$opA)),
+ (zext (v8i8 V64:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+ (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
+ (zext (extract_high_v16i8 V128:$opB))),
+ (AArch64vashr v8i16:$src, (i32 15))))),
+ (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+ (v4i32 (add (sub (zext (v4i16 V64:$opA)),
+ (zext (v4i16 V64:$opB))),
+ (AArch64vashr v4i32:$src, (i32 31))))),
+ (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+ (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)),
+ (zext (extract_high_v8i16 V128:$opB))),
+ (AArch64vashr v4i32:$src, (i32 31))))),
+ (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+ (v2i64 (add (sub (zext (v2i32 V64:$opA)),
+ (zext (v2i32 V64:$opB))),
+ (AArch64vashr v2i64:$src, (i32 63))))),
+ (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+ (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)),
+ (zext (extract_high_v4i32 V128:$opB))),
+ (AArch64vashr v2i64:$src, (i32 63))))),
+ (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
+
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
(v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
@@ -2780,29 +2857,29 @@ defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
-defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
-defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
-defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
-defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
+defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
+defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
+defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
// The following def pats catch the case where the LHS of an FMA is negated.
@@ -2816,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
(FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
-defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
+defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
@@ -2833,9 +2910,9 @@ defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
-defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
+defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
-defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
+defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
@@ -2852,9 +2929,9 @@ defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
-defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
+defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
-defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
+defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
@@ -2879,54 +2956,6 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
-def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
- (SMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
- (SMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
- (SMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
- (SMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
- (SMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
- (SMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
- (SMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
- (SMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
- (SMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
- (SMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
- (SMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
- (SMAXv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
- (UMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
- (UMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
- (UMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
- (UMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
- (UMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
- (UMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
- (UMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
- (UMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
- (UMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
- (UMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
- (UMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
- (UMAXv4i32 V128:$Rn, V128:$Rm)>;
def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
@@ -3052,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|cmlt.2d\t$dst, $src1, $src2}",
(CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmle.4h\t$dst, $src1, $src2}",
+ (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmle.8h\t$dst, $src1, $src2}",
+ (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmle.2s\t$dst, $src1, $src2}",
(FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3062,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmle.2d\t$dst, $src1, $src2}",
(FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|fcmlt.4h\t$dst, $src1, $src2}",
+ (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|fcmlt.8h\t$dst, $src1, $src2}",
+ (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmlt.2s\t$dst, $src1, $src2}",
(FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3072,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmlt.2d\t$dst, $src1, $src2}",
(FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|facle.4h\t$dst, $src1, $src2}",
+ (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|facle.8h\t$dst, $src1, $src2}",
+ (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
"|facle.2s\t$dst, $src1, $src2}",
(FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3082,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
"|facle.2d\t$dst, $src1, $src2}",
(FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|faclt.4h\t$dst, $src1, $src2}",
+ (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|faclt.8h\t$dst, $src1, $src2}",
+ (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
"|faclt.2s\t$dst, $src1, $src2}",
(FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3103,19 +3164,19 @@ defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
+defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
int_aarch64_neon_facge>;
-defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
+defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
int_aarch64_neon_facgt>;
-defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -3198,35 +3259,35 @@ defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
-defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
-defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
-defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
-defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
-defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">;
+defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
int_aarch64_neon_suqadd>;
-defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
@@ -3390,8 +3451,6 @@ defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
int_aarch64_neon_uabd>;
-defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_aarch64_neon_uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
@@ -3449,8 +3508,8 @@ defm : Neon_mulacc_widen_patterns<
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
- (vector_extract (v2i64 V128:$Rm), (i64 1))),
+def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
+ (extractelt (v2i64 V128:$Rm), (i64 1))),
(PMULLv2i64 V128:$Rn, V128:$Rm)>;
// CodeGen patterns for addhn and subhn instructions, which can actually be
@@ -3593,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">;
//----------------------------------------------------------------------------
defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
-defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
+defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
+defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
+defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -3713,12 +3772,12 @@ defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
SDNodeXForm IdxXFORM> {
- def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
imm:$idx))))),
(DUP V128:$Rn, (IdxXFORM imm:$idx))>;
- def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
- imm:$idx))))),
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
+ imm:$idx))))),
(DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
}
@@ -3747,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
(i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+ VectorIndexB:$idx)))), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+ VectorIndexH:$idx)))), i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
// Extracting i8 or i16 elements will have the zero-extend transformed to
// an 'and' mask by type legalization since neither i8 nor i16 are legal types
// for AArch64. Match these patterns here since UMOV already zeroes out the high
@@ -3784,6 +3850,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(i64 FPR64:$Rn), dsub))>;
+def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+ (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
@@ -3949,10 +4020,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
-defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
-defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
@@ -4199,15 +4270,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
// AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
"fmov", ".2d",
[(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8,
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
"fmov", ".2s",
[(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
"fmov", ".4s",
[(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
+ "fmov", ".4h",
+ [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
+ "fmov", ".8h",
+ [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+} // Predicates = [HasNEON, HasFullFP16]
// AdvSIMD MOVI
@@ -4235,7 +4314,7 @@ def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
// The movi_edit node has the immediate value already encoded, so we use
// a plain imm0_255 in the pattern
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
+def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
simdimmtype10,
"movi", ".2d",
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
@@ -4296,10 +4375,10 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
(AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
// Per byte: 8b & 16b
-def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255,
+def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
"movi", ".8b",
[(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
+def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
"movi", ".16b",
[(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
@@ -4340,8 +4419,8 @@ def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
//----------------------------------------------------------------------------
let hasSideEffects = 0 in {
- defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
- defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
+ defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
+ defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
}
// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
@@ -4349,18 +4428,18 @@ let hasSideEffects = 0 in {
// On the other hand, there are quite a few valid combinatorial options due to
// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
@@ -4424,7 +4503,9 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
V128:$Rm, VectorIndexS:$idx)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v2f32 (fneg V64:$Rm)),
+ (vector_extract (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
VectorIndexS:$idx))),
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
@@ -4442,8 +4523,8 @@ defm : FMLSIndexedAfterNegPatterns<
defm : FMLSIndexedAfterNegPatterns<
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
-defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
+defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv2i32_indexed V64:$Rn,
@@ -4497,10 +4578,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
+defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
+defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
+defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
// Codegen patterns for the above. We don't put these directly on the
// instructions because TableGen's type inference can't handle the truth.
// Having the same base pattern for fp <--> int totally freaks it out.
@@ -4573,7 +4654,7 @@ defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
//----------------------------------------------------------------------------
defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
+defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
int_aarch64_neon_vcvtfxs2fp>;
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
int_aarch64_neon_rshrn>;
@@ -4608,7 +4689,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
+defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
@@ -5133,10 +5214,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
def : Pat<(i64 (anyext GPR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
+// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
+// then assert the extension has happened.
def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+ (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
// To sign extend, we use a signed bitfield move instruction (SBFM) on the
// containing super-reg.
@@ -5801,6 +5882,21 @@ def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
}
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
+ (EXTRACT_SUBREG V128:$Rn, dsub)>;
+
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
@@ -5852,6 +5948,45 @@ def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+// Patterns for nontemporal/no-allocate stores.
+// We have to resort to tricks to turn a single-input store into a store pair,
+// because there is no single-input nontemporal store, only STNP.
+let Predicates = [IsLE] in {
+let AddedComplexity = 15 in {
+class NTStore128Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR128:$Rt),
+ (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+ (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
+ (CPYi64 FPR128:$Rt, (i64 1)),
+ GPR64sp:$Rn, simm7s8:$offset)>;
+
+def : NTStore128Pat<v2i64>;
+def : NTStore128Pat<v4i32>;
+def : NTStore128Pat<v8i16>;
+def : NTStore128Pat<v16i8>;
+
+class NTStore64Pat<ValueType VT> :
+ Pat<(nontemporalstore (VT FPR64:$Rt),
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
+ (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+
+// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
+def : NTStore64Pat<v1f64>;
+def : NTStore64Pat<v1i64>;
+def : NTStore64Pat<v2i32>;
+def : NTStore64Pat<v4i16>;
+def : NTStore64Pat<v8i8>;
+
+def : Pat<(nontemporalstore GPR64:$Rt,
+ (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+ (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
+ GPR64sp:$Rn, simm7s4:$offset)>;
+} // AddedComplexity=10
+} // Predicates = [IsLE]
+
// Tail call return handling. These are all compiler pseudo-instructions,
// so no encoding information or anything like that.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 82f77a7..566aa2c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -41,54 +41,85 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded");
STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
+STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
+STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
+STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
cl::init(20), cl::Hidden);
-// Place holder while testing unscaled load/store combining
-static cl::opt<bool> EnableAArch64UnscaledMemOp(
- "aarch64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
+namespace llvm {
+void initializeAArch64LoadStoreOptPass(PassRegistry &);
+}
+
+#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
namespace {
+
+typedef struct LdStPairFlags {
+ // If a matching instruction is found, MergeForward is set to true if the
+ // merge is to remove the first instruction and replace the second with
+ // a pair-wise insn, and false if the reverse is true.
+ bool MergeForward;
+
+ // SExtIdx gives the index of the result of the load pair that must be
+ // extended. The value of SExtIdx assumes that the paired load produces the
+ // value in this order: (I, returned iterator), i.e., -1 means no value has
+ // to be extended, 0 means I, and 1 means the returned iterator.
+ int SExtIdx;
+
+ LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+
+ void setMergeForward(bool V = true) { MergeForward = V; }
+ bool getMergeForward() const { return MergeForward; }
+
+ void setSExtIdx(int V) { SExtIdx = V; }
+ int getSExtIdx() const { return SExtIdx; }
+
+} LdStPairFlags;
+
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const AArch64Subtarget *Subtarget;
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
- // If a matching instruction is found, MergeForward is set to true if the
- // merge is to remove the first instruction and replace the second with
- // a pair-wise insn, and false if the reverse is true.
- // \p SExtIdx[out] gives the index of the result of the load pair that
- // must be extended. The value of SExtIdx assumes that the paired load
- // produces the value in this order: (I, returned iterator), i.e.,
- // -1 means no value has to be extended, 0 means I, and 1 means the
- // returned iterator.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, int &SExtIdx,
+ LdStPairFlags &Flags,
unsigned Limit);
+
+ // Scan the instructions looking for a store that writes to the address from
+ // which the current load instruction reads. Return true if one is found.
+ bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
+ MachineBasicBlock::iterator &StoreI);
+
// Merge the two instructions indicated into a single pair-wise instruction.
// If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
- // \p SExtIdx index of the result that must be extended for a paired load.
- // -1 means none, 0 means I, and 1 means Paired.
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool MergeForward,
- int SExtIdx);
+ MachineBasicBlock::iterator Paired,
+ const LdStPairFlags &Flags);
+
+ // Promote the load that reads directly from the address stored to.
+ MachineBasicBlock::iterator
+ promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+ MachineBasicBlock::iterator StoreI);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
// pre or post indexed addressing with writeback. Scan forwards.
MachineBasicBlock::iterator
findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
- int Value);
+ int UnscaledOffset);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -96,97 +127,177 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
MachineBasicBlock::iterator
findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
- // Merge a pre-index base register update into a ld/st instruction.
- MachineBasicBlock::iterator
- mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update);
+ // Find an instruction that updates the base register of the ld/st
+ // instruction.
+ bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
+ unsigned BaseReg, int Offset);
- // Merge a post-index base register update into a ld/st instruction.
+ // Merge a pre- or post-index base register update into a ld/st instruction.
MachineBasicBlock::iterator
- mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update);
+ mergeUpdateInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update, bool IsPreIdx);
+
+ // Find and merge foldable ldr/str instructions.
+ bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
- bool optimizeBlock(MachineBasicBlock &MBB);
+ // Find and promote load instructions which read directly from store.
+ bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
+
+ // Check if converting two narrow loads into a single wider load with
+ // bitfield extracts could be enabled.
+ bool enableNarrowLdMerge(MachineFunction &Fn);
+
+ bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "AArch64 load / store optimization pass";
+ return AARCH64_LOAD_STORE_OPT_NAME;
}
-
-private:
- int getMemSize(MachineInstr *MemMI);
};
char AArch64LoadStoreOpt::ID = 0;
} // namespace
-static bool isUnscaledLdst(unsigned Opc) {
+INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
+ AARCH64_LOAD_STORE_OPT_NAME, false, false)
+
+static bool isUnscaledLdSt(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::STURSi:
- return true;
case AArch64::STURDi:
- return true;
case AArch64::STURQi:
- return true;
+ case AArch64::STURBBi:
+ case AArch64::STURHHi:
case AArch64::STURWi:
- return true;
case AArch64::STURXi:
- return true;
case AArch64::LDURSi:
- return true;
case AArch64::LDURDi:
- return true;
case AArch64::LDURQi:
- return true;
case AArch64::LDURWi:
- return true;
case AArch64::LDURXi:
- return true;
case AArch64::LDURSWi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
return true;
}
}
-// Size in bytes of the data moved by an unscaled load or store
-int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
- switch (MemMI->getOpcode()) {
+static bool isUnscaledLdSt(MachineInstr *MI) {
+ return isUnscaledLdSt(MI->getOpcode());
+}
+
+static unsigned getBitExtrOpcode(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode.");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ return AArch64::UBFMWri;
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ return AArch64::SBFMWri;
+ }
+}
+
+static bool isNarrowStore(unsigned Opc) {
+ switch (Opc) {
default:
- llvm_unreachable("Opcode has unknown size!");
+ return false;
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return true;
+ }
+}
+
+static bool isNarrowStore(MachineInstr *MI) {
+ return isNarrowStore(MI->getOpcode());
+}
+
+static bool isNarrowLoad(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ return true;
+ }
+}
+
+static bool isNarrowLoad(MachineInstr *MI) {
+ return isNarrowLoad(MI->getOpcode());
+}
+
+// Scaling factor for unscaled load or store.
+static int getMemScale(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Opcode has unknown scale!");
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
+ case AArch64::LDRSBWui:
+ case AArch64::LDURSBWi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ return 1;
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRSHWui:
+ case AArch64::LDURSHWi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
+ return 2;
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ case AArch64::LDRSWui:
+ case AArch64::LDURSWi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
case AArch64::STRSui:
case AArch64::STURSi:
- return 4;
- case AArch64::STRDui:
- case AArch64::STURDi:
- return 8;
- case AArch64::STRQui:
- case AArch64::STURQi:
- return 16;
case AArch64::STRWui:
case AArch64::STURWi:
- return 4;
- case AArch64::STRXui:
- case AArch64::STURXi:
- return 8;
- case AArch64::LDRSui:
- case AArch64::LDURSi:
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPWi:
+ case AArch64::STPSi:
+ case AArch64::STPWi:
return 4;
case AArch64::LDRDui:
case AArch64::LDURDi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ case AArch64::LDPDi:
+ case AArch64::LDPXi:
+ case AArch64::STPDi:
+ case AArch64::STPXi:
return 8;
case AArch64::LDRQui:
case AArch64::LDURQi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
return 16;
- case AArch64::LDRWui:
- case AArch64::LDURWi:
- return 4;
- case AArch64::LDRXui:
- case AArch64::LDURXi:
- return 8;
- case AArch64::LDRSWui:
- case AArch64::LDURSWi:
- return 4;
}
}
@@ -203,6 +314,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::STURDi:
case AArch64::STRQui:
case AArch64::STURQi:
+ case AArch64::STRBBui:
+ case AArch64::STURBBi:
+ case AArch64::STRHHui:
+ case AArch64::STURHHi:
case AArch64::STRWui:
case AArch64::STURWi:
case AArch64::STRXui:
@@ -219,11 +334,23 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::STURSi:
case AArch64::LDRSui:
case AArch64::LDURSi:
+ case AArch64::LDRHHui:
+ case AArch64::LDURHHi:
+ case AArch64::LDRBBui:
+ case AArch64::LDURBBi:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
+ case AArch64::LDRSBWui:
+ return AArch64::LDRBBui;
+ case AArch64::LDRSHWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURSBWi:
+ return AArch64::LDURBBi;
+ case AArch64::LDURSHWi:
+ return AArch64::LDURHHi;
}
}
@@ -240,6 +367,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::STRQui:
case AArch64::STURQi:
return AArch64::STPQi;
+ case AArch64::STRBBui:
+ return AArch64::STRHHui;
+ case AArch64::STRHHui:
+ return AArch64::STRWui;
+ case AArch64::STURBBi:
+ return AArch64::STURHHi;
+ case AArch64::STURHHi:
+ return AArch64::STURWi;
case AArch64::STRWui:
case AArch64::STURWi:
return AArch64::STPWi;
@@ -264,6 +399,48 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
+ case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ return AArch64::LDRWui;
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHWi:
+ return AArch64::LDURWi;
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ return AArch64::LDRHHui;
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ return AArch64::LDURHHi;
+ }
+}
+
+static unsigned isMatchingStore(MachineInstr *LoadInst,
+ MachineInstr *StoreInst) {
+ unsigned LdOpc = LoadInst->getOpcode();
+ unsigned StOpc = StoreInst->getOpcode();
+ switch (LdOpc) {
+ default:
+ llvm_unreachable("Unsupported load instruction!");
+ case AArch64::LDRBBui:
+ return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
+ StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+ case AArch64::LDURBBi:
+ return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
+ StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+ case AArch64::LDRHHui:
+ return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
+ StOpc == AArch64::STRXui;
+ case AArch64::LDURHHi:
+ return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
+ StOpc == AArch64::STURXi;
+ case AArch64::LDRWui:
+ return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+ case AArch64::LDURWi:
+ return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+ case AArch64::LDRXui:
+ return StOpc == AArch64::STRXui;
+ case AArch64::LDURXi:
+ return StOpc == AArch64::STURXi;
}
}
@@ -277,6 +454,10 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::STRDpre;
case AArch64::STRQui:
return AArch64::STRQpre;
+ case AArch64::STRBBui:
+ return AArch64::STRBBpre;
+ case AArch64::STRHHui:
+ return AArch64::STRHHpre;
case AArch64::STRWui:
return AArch64::STRWpre;
case AArch64::STRXui:
@@ -287,12 +468,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::LDRDpre;
case AArch64::LDRQui:
return AArch64::LDRQpre;
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBpre;
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHpre;
case AArch64::LDRWui:
return AArch64::LDRWpre;
case AArch64::LDRXui:
return AArch64::LDRXpre;
case AArch64::LDRSWui:
return AArch64::LDRSWpre;
+ case AArch64::LDPSi:
+ return AArch64::LDPSpre;
+ case AArch64::LDPSWi:
+ return AArch64::LDPSWpre;
+ case AArch64::LDPDi:
+ return AArch64::LDPDpre;
+ case AArch64::LDPQi:
+ return AArch64::LDPQpre;
+ case AArch64::LDPWi:
+ return AArch64::LDPWpre;
+ case AArch64::LDPXi:
+ return AArch64::LDPXpre;
+ case AArch64::STPSi:
+ return AArch64::STPSpre;
+ case AArch64::STPDi:
+ return AArch64::STPDpre;
+ case AArch64::STPQi:
+ return AArch64::STPQpre;
+ case AArch64::STPWi:
+ return AArch64::STPWpre;
+ case AArch64::STPXi:
+ return AArch64::STPXpre;
}
}
@@ -306,6 +513,10 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::STRDpost;
case AArch64::STRQui:
return AArch64::STRQpost;
+ case AArch64::STRBBui:
+ return AArch64::STRBBpost;
+ case AArch64::STRHHui:
+ return AArch64::STRHHpost;
case AArch64::STRWui:
return AArch64::STRWpost;
case AArch64::STRXui:
@@ -316,19 +527,111 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::LDRDpost;
case AArch64::LDRQui:
return AArch64::LDRQpost;
+ case AArch64::LDRBBui:
+ return AArch64::LDRBBpost;
+ case AArch64::LDRHHui:
+ return AArch64::LDRHHpost;
case AArch64::LDRWui:
return AArch64::LDRWpost;
case AArch64::LDRXui:
return AArch64::LDRXpost;
case AArch64::LDRSWui:
return AArch64::LDRSWpost;
+ case AArch64::LDPSi:
+ return AArch64::LDPSpost;
+ case AArch64::LDPSWi:
+ return AArch64::LDPSWpost;
+ case AArch64::LDPDi:
+ return AArch64::LDPDpost;
+ case AArch64::LDPQi:
+ return AArch64::LDPQpost;
+ case AArch64::LDPWi:
+ return AArch64::LDPWpost;
+ case AArch64::LDPXi:
+ return AArch64::LDPXpost;
+ case AArch64::STPSi:
+ return AArch64::STPSpost;
+ case AArch64::STPDi:
+ return AArch64::STPDpost;
+ case AArch64::STPQi:
+ return AArch64::STPQpost;
+ case AArch64::STPWi:
+ return AArch64::STPWpost;
+ case AArch64::STPXi:
+ return AArch64::STPXpost;
}
}
+static bool isPairedLdSt(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi:
+ return true;
+ }
+}
+
+static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
+ unsigned PairedRegOp = 0) {
+ assert(PairedRegOp < 2 && "Unexpected register operand idx.");
+ unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
+ return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
+ unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
+ return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
+ unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
+ return MI->getOperand(Idx);
+}
+
+static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
+ MachineInstr *StoreInst) {
+ assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
+ int LoadSize = getMemScale(LoadInst);
+ int StoreSize = getMemScale(StoreInst);
+ int UnscaledStOffset = isUnscaledLdSt(StoreInst)
+ ? getLdStOffsetOp(StoreInst).getImm()
+ : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
+ int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
+ ? getLdStOffsetOp(LoadInst).getImm()
+ : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
+ return (UnscaledStOffset <= UnscaledLdOffset) &&
+ (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
+}
+
+// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+ MachineInstr *Op1) {
+ assert(MI->memoperands_empty() && "expected a new machineinstr");
+ size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
+ (Op1->memoperands_end() - Op1->memoperands_begin());
+
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+ MachineSDNode::mmo_iterator MemEnd =
+ std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+ MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+ MI->setMemRefs(MemBegin, MemEnd);
+}
+
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool MergeForward, int SExtIdx) {
+ const LdStPairFlags &Flags) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -338,25 +641,26 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (NextI == Paired)
++NextI;
+ int SExtIdx = Flags.getSExtIdx();
unsigned Opc =
SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
- bool IsUnscaled = isUnscaledLdst(Opc);
- int OffsetStride =
- IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
+ bool IsUnscaled = isUnscaledLdSt(Opc);
+ int OffsetStride = IsUnscaled ? getMemScale(I) : 1;
+ bool MergeForward = Flags.getMergeForward();
unsigned NewOpc = getMatchingPairOpcode(Opc);
// Insert our new paired instruction after whichever of the paired
// instructions MergeForward indicates.
MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
- MachineOperand &BaseRegOp =
- MergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ const MachineOperand &BaseRegOp =
+ MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
- if (I->getOperand(2).getImm() ==
- Paired->getOperand(2).getImm() + OffsetStride) {
+ if (getLdStOffsetOp(I).getImm() ==
+ getLdStOffsetOp(Paired).getImm() + OffsetStride) {
RtMI = Paired;
Rt2MI = I;
// Here we swapped the assumption made for SExtIdx.
@@ -368,18 +672,135 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
RtMI = I;
Rt2MI = Paired;
}
- // Handle Unscaled
- int OffsetImm = RtMI->getOperand(2).getImm();
- if (IsUnscaled && EnableAArch64UnscaledMemOp)
- OffsetImm /= OffsetStride;
+
+ int OffsetImm = getLdStOffsetOp(RtMI).getImm();
+
+ if (isNarrowLoad(Opc)) {
+ // Change the scaled offset from small to large type.
+ if (!IsUnscaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+ OffsetImm /= 2;
+ }
+ MachineInstr *RtNewDest = MergeForward ? I : Paired;
+ // When merging small (< 32 bit) loads for big-endian targets, the order of
+ // the component parts gets swapped.
+ if (!Subtarget->isLittleEndian())
+ std::swap(RtMI, Rt2MI);
+ // Construct the new load instruction.
+ MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
+ NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(RtNewDest))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+
+ // Copy MachineMemOperands from the original loads.
+ concatenateMemOperands(NewMemMI, I, Paired);
+
+ DEBUG(
+ dbgs()
+ << "Creating the new load and extract. Replacing instructions:\n ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(Paired->print(dbgs()));
+ DEBUG(dbgs() << " with instructions:\n ");
+ DEBUG((NewMemMI)->print(dbgs()));
+
+ int Width = getMemScale(I) == 1 ? 8 : 16;
+ int LSBLow = 0;
+ int LSBHigh = Width;
+ int ImmsLow = LSBLow + Width - 1;
+ int ImmsHigh = LSBHigh + Width - 1;
+ MachineInstr *ExtDestMI = MergeForward ? Paired : I;
+ if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
+ // Create the bitfield extract for high bits.
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(Rt2MI)))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
+ } else {
+ // Create the bitfield extract for low bits.
+ if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+ // For unsigned, prefer to use AND for low bits.
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(AArch64::ANDWri))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(ImmsLow);
+ } else {
+ BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(RtMI)))
+ .addOperand(getLdStRegOp(RtMI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBLow)
+ .addImm(ImmsLow);
+ }
+
+ // Create the bitfield extract for high bits.
+ BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(getBitExtrOpcode(Rt2MI)))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addReg(getLdStRegOp(RtNewDest).getReg())
+ .addImm(LSBHigh)
+ .addImm(ImmsHigh);
+ }
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI1)->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI2)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions.
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+ return NextI;
+ }
// Construct the new instruction.
- MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
- I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(RtMI->getOperand(0))
- .addOperand(Rt2MI->getOperand(0))
- .addOperand(BaseRegOp)
- .addImm(OffsetImm);
+ MachineInstrBuilder MIB;
+ if (isNarrowStore(Opc)) {
+ // Change the scaled offset from small to large type.
+ if (!IsUnscaled) {
+ assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+ OffsetImm /= 2;
+ }
+ MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+ // Copy MachineMemOperands from the original stores.
+ concatenateMemOperands(MIB, I, Paired);
+ } else {
+ // Handle Unscaled
+ if (IsUnscaled)
+ OffsetImm /= OffsetStride;
+ MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+ TII->get(NewOpc))
+ .addOperand(getLdStRegOp(RtMI))
+ .addOperand(getLdStRegOp(Rt2MI))
+ .addOperand(BaseRegOp)
+ .addImm(OffsetImm);
+ }
+
(void)MIB;
// FIXME: Do we need/want to copy the mem operands from the source
@@ -439,13 +860,112 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
return NextI;
}
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+ MachineBasicBlock::iterator StoreI) {
+ MachineBasicBlock::iterator NextI = LoadI;
+ ++NextI;
+
+ int LoadSize = getMemScale(LoadI);
+ int StoreSize = getMemScale(StoreI);
+ unsigned LdRt = getLdStRegOp(LoadI).getReg();
+ unsigned StRt = getLdStRegOp(StoreI).getReg();
+ bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
+
+ assert((IsStoreXReg ||
+ TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
+ "Unexpected RegClass");
+
+ MachineInstr *BitExtMI;
+ if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
+ // Remove the load, if the destination register of the loads is the same
+ // register for stored value.
+ if (StRt == LdRt && LoadSize == 8) {
+ DEBUG(dbgs() << "Remove load instruction:\n ");
+ DEBUG(LoadI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ LoadI->eraseFromParent();
+ return NextI;
+ }
+ // Replace the load with a mov if the load and store are in the same size.
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
+ .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
+ .addReg(StRt)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ // FIXME: Currently we disable this transformation in big-endian targets as
+ // performance and correctness are verified only in little-endian.
+ if (!Subtarget->isLittleEndian())
+ return NextI;
+ bool IsUnscaled = isUnscaledLdSt(LoadI);
+ assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
+ assert(LoadSize <= StoreSize && "Invalid load size");
+ int UnscaledLdOffset = IsUnscaled
+ ? getLdStOffsetOp(LoadI).getImm()
+ : getLdStOffsetOp(LoadI).getImm() * LoadSize;
+ int UnscaledStOffset = IsUnscaled
+ ? getLdStOffsetOp(StoreI).getImm()
+ : getLdStOffsetOp(StoreI).getImm() * StoreSize;
+ int Width = LoadSize * 8;
+ int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+ int Imms = Immr + Width - 1;
+ unsigned DestReg = IsStoreXReg
+ ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
+ &AArch64::GPR64RegClass)
+ : LdRt;
+
+ assert((UnscaledLdOffset >= UnscaledStOffset &&
+ (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
+ "Invalid offset");
+
+ Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+ Imms = Immr + Width - 1;
+ if (UnscaledLdOffset == UnscaledStOffset) {
+ uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
+ | ((Immr) << 6) // immr
+ | ((Imms) << 0) // imms
+ ;
+
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
+ DestReg)
+ .addReg(StRt)
+ .addImm(AndMaskEncoded);
+ } else {
+ BitExtMI =
+ BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+ TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
+ DestReg)
+ .addReg(StRt)
+ .addImm(Immr)
+ .addImm(Imms);
+ }
+ }
+
+ DEBUG(dbgs() << "Promoting load by replacing :\n ");
+ DEBUG(StoreI->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG(LoadI->print(dbgs()));
+ DEBUG(dbgs() << " with instructions:\n ");
+ DEBUG(StoreI->print(dbgs()));
+ DEBUG(dbgs() << " ");
+ DEBUG((BitExtMI)->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+
+ // Erase the old instructions.
+ LoadI->eraseFromParent();
+ return NextI;
+}
+
/// trackRegDefsUses - Remember what registers the specified instruction uses
/// and modifies.
-static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
+static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
BitVector &UsedRegs,
const TargetRegisterInfo *TRI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask())
ModifiedRegs.setBitsNotInMask(MO.getRegMask());
@@ -464,16 +984,12 @@ static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
}
static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
- if (!IsUnscaled && (Offset > 63 || Offset < -64))
- return false;
- if (IsUnscaled) {
- // Convert the byte-offset used by unscaled into an "element" offset used
- // by the scaled pair load/store instructions.
- int ElemOffset = Offset / OffsetStride;
- if (ElemOffset > 63 || ElemOffset < -64)
- return false;
- }
- return true;
+ // Convert the byte-offset used by unscaled into an "element" offset used
+ // by the scaled pair load/store instructions.
+ if (IsUnscaled)
+ Offset /= OffsetStride;
+
+ return Offset <= 63 && Offset >= -64;
}
// Do alignment, specialized to power of 2 and for signed ints,
@@ -507,12 +1023,65 @@ static bool mayAlias(MachineInstr *MIa,
return false;
}
+bool AArch64LoadStoreOpt::findMatchingStore(
+ MachineBasicBlock::iterator I, unsigned Limit,
+ MachineBasicBlock::iterator &StoreI) {
+ MachineBasicBlock::iterator E = I->getParent()->begin();
+ MachineBasicBlock::iterator MBBI = I;
+ MachineInstr *FirstMI = I;
+ unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+
+ // Track which registers have been modified and used between the first insn
+ // and the second insn.
+ BitVector ModifiedRegs, UsedRegs;
+ ModifiedRegs.resize(TRI->getNumRegs());
+ UsedRegs.resize(TRI->getNumRegs());
+
+ for (unsigned Count = 0; MBBI != E && Count < Limit;) {
+ --MBBI;
+ MachineInstr *MI = MBBI;
+ // Skip DBG_VALUE instructions. Otherwise debug info can affect the
+ // optimization by changing how far we scan.
+ if (MI->isDebugValue())
+ continue;
+ // Now that we know this is a real instruction, count it.
+ ++Count;
+
+ // If the load instruction reads directly from the address to which the
+ // store instruction writes and the stored value is not modified, we can
+ // promote the load. Since we do not handle stores with pre-/post-index,
+ // it's unnecessary to check if BaseReg is modified by the store itself.
+ if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
+ BaseReg == getLdStBaseOp(MI).getReg() &&
+ isLdOffsetInRangeOfSt(FirstMI, MI) &&
+ !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
+ StoreI = MBBI;
+ return true;
+ }
+
+ if (MI->isCall())
+ return false;
+
+ // Update modified / uses register lists.
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+
+ // Otherwise, if the base register is modified, we have no match, so
+ // return early.
+ if (ModifiedRegs[BaseReg])
+ return false;
+
+ // If we encounter a store aliased with the load, return early.
+ if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
+ return false;
+ }
+ return false;
+}
+
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &MergeForward, int &SExtIdx,
- unsigned Limit) {
+ LdStPairFlags &Flags, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -520,21 +1089,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
unsigned Opc = FirstMI->getOpcode();
bool MayLoad = FirstMI->mayLoad();
- bool IsUnscaled = isUnscaledLdst(Opc);
- unsigned Reg = FirstMI->getOperand(0).getReg();
- unsigned BaseReg = FirstMI->getOperand(1).getReg();
- int Offset = FirstMI->getOperand(2).getImm();
+ bool IsUnscaled = isUnscaledLdSt(FirstMI);
+ unsigned Reg = getLdStRegOp(FirstMI).getReg();
+ unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+ int Offset = getLdStOffsetOp(FirstMI).getImm();
+ bool IsNarrowStore = isNarrowStore(Opc);
+
+ // For narrow stores, find only the case where the stored value is WZR.
+ if (IsNarrowStore && Reg != AArch64::WZR)
+ return E;
// Early exit if the first instruction modifies the base register.
// e.g., ldr x0, [x0]
- // Early exit if the offset if not possible to match. (6 bits of positive
- // range, plus allow an extra one in case we find a later insn that matches
- // with Offset-1
if (FirstMI->modifiesRegister(BaseReg, TRI))
return E;
- int OffsetStride =
- IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
- if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
+
+ // Early exit if the offset if not possible to match. (6 bits of positive
+ // range, plus allow an extra one in case we find a later insn that matches
+ // with Offset-1)
+ int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+ if (!(isNarrowLoad(Opc) || IsNarrowStore) &&
+ !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return E;
// Track which registers have been modified and used between the first insn
@@ -557,18 +1132,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
++Count;
bool CanMergeOpc = Opc == MI->getOpcode();
- SExtIdx = -1;
+ Flags.setSExtIdx(-1);
if (!CanMergeOpc) {
bool IsValidLdStrOpc;
unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
- if (!IsValidLdStrOpc)
- continue;
+ assert(IsValidLdStrOpc &&
+ "Given Opc should be a Load or Store with an immediate");
// Opc will be the first instruction in the pair.
- SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+ Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
}
- if (CanMergeOpc && MI->getOperand(2).isImm()) {
+ if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
+ assert(MI->mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
// These instructions all have scaled immediate operands, so we just
@@ -579,8 +1155,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Pairwise instructions have a 7-bit signed offset field. Single insns
// have a 12-bit unsigned offset field. To be a valid combine, the
// final offset must be in range.
- unsigned MIBaseReg = MI->getOperand(1).getReg();
- int MIOffset = MI->getOperand(2).getImm();
+ unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
+ int MIOffset = getLdStOffsetOp(MI).getImm();
if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
(Offset + OffsetStride == MIOffset))) {
int MinOffset = Offset < MIOffset ? Offset : MIOffset;
@@ -591,30 +1167,43 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
return E;
// If the resultant immediate offset of merging these instructions
// is out of range for a pairwise instruction, bail and keep looking.
- bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
- if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
+ bool MIIsUnscaled = isUnscaledLdSt(MI);
+ bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
+ if (!IsNarrowLoad &&
+ !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
- // If the alignment requirements of the paired (scaled) instruction
- // can't express the offset of the unscaled input, bail and keep
- // looking.
- if (IsUnscaled && EnableAArch64UnscaledMemOp &&
- (alignTo(MinOffset, OffsetStride) != MinOffset)) {
- trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
+
+ if (IsNarrowLoad || IsNarrowStore) {
+ // If the alignment requirements of the scaled wide load/store
+ // instruction can't express the offset of the scaled narrow
+ // input, bail and keep looking.
+ if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
MemInsns.push_back(MI);
- continue;
+ continue;
+ }
+ } else {
+ // If the alignment requirements of the paired (scaled) instruction
+ // can't express the offset of the unscaled input, bail and keep
+ // looking.
+ if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ MemInsns.push_back(MI);
+ continue;
+ }
}
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (MayLoad && Reg == MI->getOperand(0).getReg()) {
+ // For narrow stores, allow only when the stored value is the same
+ // (i.e., WZR).
+ if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
+ (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
- if (MI->mayLoadOrStore())
- MemInsns.push_back(MI);
+ MemInsns.push_back(MI);
continue;
}
@@ -622,10 +1211,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// the two instructions and none of the instructions between the second
// and first alias with the second, we can combine the second into the
// first.
- if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
- !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
+ !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
!mayAlias(MI, MemInsns, TII)) {
- MergeForward = false;
+ Flags.setMergeForward(false);
return MBBI;
}
@@ -633,11 +1222,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
- !(FirstMI->mayLoad() &&
- UsedRegs[FirstMI->getOperand(0).getReg()]) &&
+ if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
+ !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
!mayAlias(FirstMI, MemInsns, TII)) {
- MergeForward = true;
+ Flags.setMergeForward(true);
return MBBI;
}
// Unable to combine these instructions due to interference in between.
@@ -666,51 +1254,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
}
MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update) {
- assert((Update->getOpcode() == AArch64::ADDXri ||
- Update->getOpcode() == AArch64::SUBXri) &&
- "Unexpected base register update instruction to merge!");
- MachineBasicBlock::iterator NextI = I;
- // Return the instruction following the merged instruction, which is
- // the instruction following our unmerged load. Unless that's the add/sub
- // instruction we're merging, in which case it's the one after that.
- if (++NextI == Update)
- ++NextI;
-
- int Value = Update->getOperand(2).getImm();
- assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
- "Can't merge 1 << 12 offset into pre-indexed load / store");
- if (Update->getOpcode() == AArch64::SUBXri)
- Value = -Value;
-
- unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
- MachineInstrBuilder MIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
- .addImm(Value);
- (void)MIB;
-
- DEBUG(dbgs() << "Creating pre-indexed load/store.");
- DEBUG(dbgs() << " Replacing instructions:\n ");
- DEBUG(I->print(dbgs()));
- DEBUG(dbgs() << " ");
- DEBUG(Update->print(dbgs()));
- DEBUG(dbgs() << " with instruction:\n ");
- DEBUG(((MachineInstr *)MIB)->print(dbgs()));
- DEBUG(dbgs() << "\n");
-
- // Erase the old instructions for the block.
- I->eraseFromParent();
- Update->eraseFromParent();
-
- return NextI;
-}
-
-MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
- MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
+AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update,
+ bool IsPreIdx) {
assert((Update->getOpcode() == AArch64::ADDXri ||
Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
@@ -723,20 +1269,36 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
int Value = Update->getOperand(2).getImm();
assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
- "Can't merge 1 << 12 offset into post-indexed load / store");
+ "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
if (Update->getOpcode() == AArch64::SUBXri)
Value = -Value;
- unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
- MachineInstrBuilder MIB =
- BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(Update->getOperand(0))
- .addOperand(I->getOperand(0))
- .addOperand(I->getOperand(1))
- .addImm(Value);
+ unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
+ : getPostIndexedOpcode(I->getOpcode());
+ MachineInstrBuilder MIB;
+ if (!isPairedLdSt(I)) {
+ // Non-paired instruction.
+ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I))
+ .addOperand(getLdStBaseOp(I))
+ .addImm(Value);
+ } else {
+ // Paired instruction.
+ int Scale = getMemScale(I);
+ MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(getLdStRegOp(Update))
+ .addOperand(getLdStRegOp(I, 0))
+ .addOperand(getLdStRegOp(I, 1))
+ .addOperand(getLdStBaseOp(I))
+ .addImm(Value / Scale);
+ }
(void)MIB;
- DEBUG(dbgs() << "Creating post-indexed load/store.");
+ if (IsPreIdx)
+ DEBUG(dbgs() << "Creating pre-indexed load/store.");
+ else
+ DEBUG(dbgs() << "Creating post-indexed load/store.");
DEBUG(dbgs() << " Replacing instructions:\n ");
DEBUG(I->print(dbgs()));
DEBUG(dbgs() << " ");
@@ -752,8 +1314,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
return NextI;
}
-static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
- int Offset) {
+bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
+ MachineInstr *MI,
+ unsigned BaseReg, int Offset) {
switch (MI->getOpcode()) {
default:
break;
@@ -769,44 +1332,65 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
// Watch out for 1 << 12 shifted value.
if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
break;
- // If the instruction has the base register as source and dest and the
- // immediate will fit in a signed 9-bit integer, then we have a match.
- if (MI->getOperand(0).getReg() == BaseReg &&
- MI->getOperand(1).getReg() == BaseReg &&
- MI->getOperand(2).getImm() <= 255 &&
- MI->getOperand(2).getImm() >= -256) {
- // If we have a non-zero Offset, we check that it matches the amount
- // we're adding to the register.
- if (!Offset || Offset == MI->getOperand(2).getImm())
- return true;
+
+ // The update instruction source and destination register must be the
+ // same as the load/store base register.
+ if (MI->getOperand(0).getReg() != BaseReg ||
+ MI->getOperand(1).getReg() != BaseReg)
+ break;
+
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ int UpdateOffset = MI->getOperand(2).getImm();
+ // For non-paired load/store instructions, the immediate must fit in a
+ // signed 9-bit integer.
+ if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
+ break;
+
+ // For paired load/store instructions, the immediate must be a multiple of
+ // the scaling factor. The scaled offset must also fit into a signed 7-bit
+ // integer.
+ if (IsPairedInsn) {
+ int Scale = getMemScale(MemMI);
+ if (UpdateOffset % Scale != 0)
+ break;
+
+ int ScaledOffset = UpdateOffset / Scale;
+ if (ScaledOffset > 64 || ScaledOffset < -64)
+ break;
}
+
+ // If we have a non-zero Offset, we check that it matches the amount
+ // we're adding to the register.
+ if (!Offset || Offset == MI->getOperand(2).getImm())
+ return true;
break;
}
return false;
}
MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
- MachineBasicBlock::iterator I, unsigned Limit, int Value) {
+ MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
- const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm() *
- TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
- // If the base register overlaps the destination register, we can't
- // merge the update.
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ // Scan forward looking for post-index opportunities. Updating instructions
+ // can't be formed if the memory instruction doesn't have the offset we're
+ // looking for.
+ if (MIUnscaledOffset != UnscaledOffset)
return E;
- // Scan forward looking for post-index opportunities.
- // Updating instructions can't be formed if the memory insn already
- // has an offset other than the value we're looking for.
- if (Offset != Value)
- return E;
+ // If the base register overlaps a destination register, we can't
+ // merge the update.
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
@@ -825,7 +1409,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(MI, BaseReg, Value))
+ if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -845,21 +1429,22 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
- const MachineFunction &MF = *MemMI->getParent()->getParent();
- unsigned DestReg = MemMI->getOperand(0).getReg();
- unsigned BaseReg = MemMI->getOperand(1).getReg();
- int Offset = MemMI->getOperand(2).getImm();
- unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+ unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+ int Offset = getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
if (MBBI == B || Offset != 0)
return E;
- // If the base register overlaps the destination register, we can't
+ // If the base register overlaps a destination register, we can't
// merge the update.
- if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
- return E;
+ bool IsPairedInsn = isPairedLdSt(MemMI);
+ for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+ unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+ if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+ return E;
+ }
// Track which registers have been modified and used between the first insn
// (inclusive) and the second insn.
@@ -878,7 +1463,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
++Count;
// If we found a match, return it.
- if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
+ if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
return MBBI;
// Update the status of what the instruction clobbered and used.
@@ -892,17 +1477,101 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
-bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ // If this is a volatile load, don't mess with it.
+ if (MI->hasOrderedMemoryRef())
+ return false;
+
+ // Make sure this is a reg+imm.
+ // FIXME: It is possible to extend it to handle reg+reg cases.
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ // Look backward up to ScanLimit instructions.
+ MachineBasicBlock::iterator StoreI;
+ if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
+ ++NumLoadsFromStoresPromoted;
+ // Promote the load. Keeping the iterator straight is a
+ // pain, so we let the merge routine tell us what the next instruction
+ // is after it's done mucking about.
+ MBBI = promoteLoadFromStore(MBBI, StoreI);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64LoadStoreOpt::tryToMergeLdStInst(
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = MBBI;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ // If this is a volatile load/store, don't mess with it.
+ if (MI->hasOrderedMemoryRef())
+ return false;
+
+ // Make sure this is a reg+imm (as opposed to an address reloc).
+ if (!getLdStOffsetOp(MI).isImm())
+ return false;
+
+ // Check if this load/store has a hint to avoid pair formation.
+ // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+ if (TII->isLdStPairSuppressed(MI))
+ return false;
+
+ // Look ahead up to ScanLimit instructions for a pairable instruction.
+ LdStPairFlags Flags;
+ MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit);
+ if (Paired != E) {
+ if (isNarrowLoad(MI)) {
+ ++NumNarrowLoadsPromoted;
+ } else if (isNarrowStore(MI)) {
+ ++NumZeroStoresPromoted;
+ } else {
+ ++NumPairCreated;
+ if (isUnscaledLdSt(MI))
+ ++NumUnscaledPairCreated;
+ }
+
+ // Merge the loads into a pair. Keeping the iterator straight is a
+ // pain, so we let the merge routine tell us what the next instruction
+ // is after it's done mucking about.
+ MBBI = mergePairedInsns(MBBI, Paired, Flags);
+ return true;
+ }
+ return false;
+}
+
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
+ bool enableNarrowLdOpt) {
bool Modified = false;
- // Two tranformations to do here:
- // 1) Find loads and stores that can be merged into a single load or store
+ // Three tranformations to do here:
+ // 1) Find loads that directly read from stores and promote them by
+ // replacing with mov instructions. If the store is wider than the load,
+ // the load will be replaced with a bitfield extract.
+ // e.g.,
+ // str w1, [x0, #4]
+ // ldrh w2, [x0, #6]
+ // ; becomes
+ // str w1, [x0, #4]
+ // lsr w2, w1, #16
+ // 2) Find narrow loads that can be converted into a single wider load
+ // with bitfield extract instructions.
+ // e.g.,
+ // ldrh w0, [x2]
+ // ldrh w1, [x2, #2]
+ // ; becomes
+ // ldr w0, [x2]
+ // ubfx w1, w0, #16, #16
+ // and w0, w0, #ffff
+ // 3) Find loads and stores that can be merged into a single load or store
// pair instruction.
// e.g.,
// ldr x0, [x2]
// ldr x1, [x2, #8]
// ; becomes
// ldp x0, x1, [x2]
- // 2) Find base register updates that can be merged into the load or store
+ // 4) Find base register updates that can be merged into the load or store
// as a base-reg writeback.
// e.g.,
// ldr x0, [x2]
@@ -918,6 +1587,69 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi: {
+ if (tryToPromoteLoadFromStore(MBBI)) {
+ Modified = true;
+ break;
+ }
+ ++MBBI;
+ break;
+ }
+ // FIXME: Do the other instructions.
+ }
+ }
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ enableNarrowLdOpt && MBBI != E;) {
+ MachineInstr *MI = MBBI;
+ switch (MI->getOpcode()) {
+ default:
+ // Just move on to the next instruction.
+ ++MBBI;
+ break;
+ // Scaled instructions.
+ case AArch64::LDRBBui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSHWui:
+ case AArch64::STRBBui:
+ case AArch64::STRHHui:
+ // Unscaled instructions.
+ case AArch64::LDURBBi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHWi:
+ case AArch64::STURBBi:
+ case AArch64::STURHHi: {
+ if (tryToMergeLdStInst(MBBI)) {
+ Modified = true;
+ break;
+ }
+ ++MBBI;
+ break;
+ }
+ // FIXME: Do the other instructions.
+ }
+ }
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ MachineInstr *MI = MBBI;
+ switch (MI->getOpcode()) {
+ default:
+ // Just move on to the next instruction.
+ ++MBBI;
+ break;
+ // Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
@@ -929,7 +1661,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDRXui:
case AArch64::LDRWui:
case AArch64::LDRSWui:
- // do the unscaled versions as well
+ // Unscaled instructions.
case AArch64::STURSi:
case AArch64::STURDi:
case AArch64::STURQi:
@@ -941,37 +1673,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURWi:
case AArch64::LDURXi:
case AArch64::LDURSWi: {
- // If this is a volatile load/store, don't mess with it.
- if (MI->hasOrderedMemoryRef()) {
- ++MBBI;
- break;
- }
- // Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
- ++MBBI;
- break;
- }
- // Check if this load/store has a hint to avoid pair formation.
- // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
- if (TII->isLdStPairSuppressed(MI)) {
- ++MBBI;
- break;
- }
- // Look ahead up to ScanLimit instructions for a pairable instruction.
- bool MergeForward = false;
- int SExtIdx = -1;
- MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
- if (Paired != E) {
- // Merge the loads into a pair. Keeping the iterator straight is a
- // pain, so we let the merge routine tell us what the next instruction
- // is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
-
+ if (tryToMergeLdStInst(MBBI)) {
Modified = true;
- ++NumPairCreated;
- if (isUnscaledLdst(MI->getOpcode()))
- ++NumUnscaledPairCreated;
break;
}
++MBBI;
@@ -992,17 +1695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
+ // Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::STRXui:
case AArch64::STRWui:
+ case AArch64::STRHHui:
+ case AArch64::STRBBui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
- // do the unscaled versions as well
+ case AArch64::LDRHHui:
+ case AArch64::LDRBBui:
+ // Unscaled instructions.
case AArch64::STURSi:
case AArch64::STURDi:
case AArch64::STURQi:
@@ -1012,25 +1720,41 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
case AArch64::LDURDi:
case AArch64::LDURQi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
+ // Paired instructions.
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi:
+ case AArch64::STPSi:
+ case AArch64::STPDi:
+ case AArch64::STPQi:
+ case AArch64::STPWi:
+ case AArch64::STPXi: {
// Make sure this is a reg+imm (as opposed to an address reloc).
- if (!MI->getOperand(2).isImm()) {
+ if (!getLdStOffsetOp(MI).isImm()) {
++MBBI;
break;
}
- // Look ahead up to ScanLimit instructions for a mergable instruction.
+ // Look forward to try to form a post-index instruction. For example,
+ // ldr x0, [x20]
+ // add x20, x20, #32
+ // merged into:
+ // ldr x0, [x20], #32
MachineBasicBlock::iterator Update =
findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePostIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
Modified = true;
++NumPostFolded;
break;
}
// Don't know how to handle pre/post-index versions, so move to the next
// instruction.
- if (isUnscaledLdst(Opc)) {
+ if (isUnscaledLdSt(Opc)) {
++MBBI;
break;
}
@@ -1043,28 +1767,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Modified = true;
++NumPreFolded;
break;
}
+ // The immediate in the load/store is scaled by the size of the memory
+ // operation. The immediate in the add we're looking for,
+ // however, is not, so adjust here.
+ int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
// Look forward to try to find a post-index instruction. For example,
// ldr x1, [x0, #64]
// add x0, x0, #64
// merged into:
// ldr x1, [x0, #64]!
-
- // The immediate in the load/store is scaled by the size of the register
- // being loaded. The immediate in the add we're looking for,
- // however, is not, so adjust here.
- int Value = MI->getOperand(2).getImm() *
- TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
- ->getSize();
- Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
+ Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset);
if (Update != E) {
// Merge the update into the ld/st.
- MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+ MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
Modified = true;
++NumPreFolded;
break;
@@ -1081,13 +1802,24 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
return Modified;
}
+bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
+ bool ProfitableArch = Subtarget->isCortexA57();
+ // FIXME: The benefit from converting narrow loads into a wider load could be
+ // microarchitectural as it assumes that a single load with two bitfield
+ // extracts is cheaper than two narrow loads. Currently, this conversion is
+ // enabled only in cortex-a57 on which performance benefits were verified.
+ return ProfitableArch && !Subtarget->requiresStrictAlign();
+}
+
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
- TRI = Fn.getSubtarget().getRegisterInfo();
+ Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+ TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
+ TRI = Subtarget->getRegisterInfo();
bool Modified = false;
+ bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
for (auto &MBB : Fn)
- Modified |= optimizeBlock(MBB);
+ Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
return Modified;
}
@@ -1095,8 +1827,8 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
// loads and stores near one another?
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// createAArch64LoadStoreOptimizationPass - returns an instance of the
+/// load / store optimization pass.
FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
return new AArch64LoadStoreOpt();
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 580427a..2b4cdf1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -207,9 +207,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- if (lowerOperand(MI->getOperand(i), MCOp))
+ if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h
deleted file mode 100644
index 4164b33..0000000
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineCombinerPattern.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- AArch64MachineCombinerPattern.h -===//
-//===- AArch64 instruction pattern supported by combiner -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines instruction pattern supported by combiner
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-
-namespace llvm {
-
-/// Enumeration of instruction pattern supported by machine combiner
-///
-///
-namespace MachineCombinerPattern {
-enum MC_PATTERN : int {
- MC_NONE = 0,
- MC_MULADDW_OP1 = 1,
- MC_MULADDW_OP2 = 2,
- MC_MULSUBW_OP1 = 3,
- MC_MULSUBW_OP2 = 4,
- MC_MULADDWI_OP1 = 5,
- MC_MULSUBWI_OP1 = 6,
- MC_MULADDX_OP1 = 7,
- MC_MULADDX_OP2 = 8,
- MC_MULSUBX_OP1 = 9,
- MC_MULSUBX_OP2 = 10,
- MC_MULADDXI_OP1 = 11,
- MC_MULSUBXI_OP1 = 12
-};
-} // end namespace MachineCombinerPattern
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 536a8d0..318f839 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=//
+//=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -42,7 +42,7 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
unsigned ArgumentStackToRestore;
/// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
bool HasStackFrame;
/// \brief Amount of stack frame size, not including callee-saved registers.
@@ -72,16 +72,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// registers.
unsigned VarArgsFPRSize;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {}
explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
- VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+ IsSplitCSR(false) {
(void)MF;
}
@@ -96,6 +102,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
unsigned getLocalStackSize() const { return LocalStackSize; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index e1b93bf..79c09d9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -489,7 +489,7 @@ bool AArch64PromoteConstant::insertDefinitions(
for (const auto &IPI : InsertPts) {
// Create the load of the global variable.
- IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
+ IRBuilder<> Builder(IPI.first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
DEBUG(dbgs() << "New def: ");
@@ -540,7 +540,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
bool LocalChange = false;
SmallPtrSet<Constant *, 8> AlreadyChecked;
- for (Instruction &I : inst_range(&F)) {
+ for (Instruction &I : instructions(&F)) {
// Traverse the operand, looking for constant vectors. Replace them by a
// load of a global variable of constant vector type.
for (Value *Op : I.operand_values()) {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 841af55..32b4888 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
@@ -34,10 +35,6 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
-static cl::opt<bool>
-ReserveX18("aarch64-reserve-x18", cl::Hidden,
- cl::desc("Reserve X18, making it unavailable as GPR"));
-
AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
: AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
@@ -50,10 +47,23 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_NoRegs_SaveList;
if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_SaveList;
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
+ CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
+ CSR_AArch64_CXX_TLS_Darwin_SaveList;
else
return CSR_AArch64_AAPCS_SaveList;
}
+const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
+ return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -62,6 +72,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_AArch64_NoRegs_RegMask;
if (CC == CallingConv::AnyReg)
return CSR_AArch64_AllRegs_RegMask;
+ if (CC == CallingConv::CXX_FAST_TLS)
+ return CSR_AArch64_CXX_TLS_Darwin_RegMask;
else
return CSR_AArch64_AAPCS_RegMask;
}
@@ -104,7 +116,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AArch64::W29);
}
- if (TT.isOSDarwin() || ReserveX18) {
+ if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
Reserved.set(AArch64::X18); // Platform register
Reserved.set(AArch64::W18);
}
@@ -131,7 +143,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
return true;
case AArch64::X18:
case AArch64::W18:
- return TT.isOSDarwin() || ReserveX18;
+ return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
case AArch64::FP:
case AArch64::W29:
return TFI->hasFP(MF) || TT.isOSDarwin();
@@ -186,29 +198,6 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
-bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
-
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- return true;
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool
-AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64FrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const AArch64FrameLowering *TFI = getFrameLowering(MF);
@@ -424,10 +413,11 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case AArch64::GPR64RegClassID:
case AArch64::GPR32commonRegClassID:
case AArch64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
- - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
- - hasBasePointer(MF); // X19
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+ - MF.getSubtarget<AArch64Subtarget>()
+ .isX18Reserved() // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
case AArch64::FPR8RegClassID:
case AArch64::FPR16RegClassID:
case AArch64::FPR32RegClassID:
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 8c379d9..f33f788 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -35,6 +35,8 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
@@ -93,9 +95,6 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- // Base pointer (stack realignment) support.
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index b2efca0..a8c8b17 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64",
// The lower 16 vector registers. Some instructions can only take registers
// in this range.
def FPR128_lo : RegisterClass<"AArch64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
128, (trunc FPR128, 16)>;
// Pairs, triples, and quads of 64-bit vector registers.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 486efd6..f6ee8cf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -31,6 +31,11 @@ static cl::opt<bool>
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
"converter pass"), cl::init(true), cl::Hidden);
+// If OS supports TBI, use this flag to enable it.
+static cl::opt<bool>
+UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
+ "an address is ignored"), cl::init(false), cl::Hidden);
+
AArch64Subtarget &
AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
// Determine default and user-specified characteristics
@@ -46,9 +51,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
- HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
- IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+ HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false),
+ HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian),
+ CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
TLInfo(TM, *this) {}
@@ -113,12 +120,30 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
// bi-directional scheduling. 253.perlbmk.
Policy.OnlyTopDown = false;
Policy.OnlyBottomUp = false;
+ // Enabling or Disabling the latency heuristic is a close call: It seems to
+ // help nearly no benchmark on out-of-order architectures, on the other hand
+ // it regresses register pressure on a few benchmarking.
+ if (isCyclone())
+ Policy.DisableLatencyHeuristic = true;
}
bool AArch64Subtarget::enableEarlyIfConversion() const {
return EnableEarlyIfConvert;
}
+bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
+ if (!UseAddressTopByteIgnored)
+ return false;
+
+ if (TargetTriple.isiOS()) {
+ unsigned Major, Minor, Micro;
+ TargetTriple.getiOSVersion(Major, Minor, Micro);
+ return Major >= 8;
+ }
+
+ return false;
+}
+
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
if (!isCortexA57())
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 6bb0694..1b8b9b2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -33,17 +33,21 @@ class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
protected:
- enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone};
+ enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
bool HasV8_1aOps;
+ bool HasV8_2aOps;
bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
bool HasCRC;
+ bool HasPerfMon;
+ bool HasFullFP16;
+ bool HasSPE;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -51,6 +55,12 @@ protected:
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing;
+ // StrictAlign - Disallow unaligned memory accesses.
+ bool StrictAlign;
+
+ // ReserveX18 - X18 is not available as a general purpose register.
+ bool ReserveX18;
+
bool IsLittle;
/// CPUString - String name of used CPU.
@@ -92,19 +102,30 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override {
- return isCortexA53() || isCortexA57();
+ return isGeneric() || isCortexA53() || isCortexA57();
}
bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+ bool requiresStrictAlign() const { return StrictAlign; }
+
+ bool isX18Reserved() const { return ReserveX18; }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ /// CPU has TBI (top byte of addresses is ignored during HW address
+ /// translation) and OS enables it.
+ bool supportsAddressTopByteIgnored() const;
+
+ bool hasPerfMon() const { return HasPerfMon; }
+ bool hasFullFP16() const { return HasFullFP16; }
+ bool hasSPE() const { return HasSPE; }
bool isLittleEndian() const { return IsLittle; }
@@ -112,11 +133,13 @@ public:
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+ bool isGeneric() const { return CPUString == "generic"; }
bool isCyclone() const { return CPUString == "cyclone"; }
bool isCortexA57() const { return CPUString == "cortex-a57"; }
bool isCortexA53() const { return CPUString == "cortex-a53"; }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index db6e244..c52c554 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -203,7 +203,7 @@ public:
} // namespace
TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(AArch64TTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e085cca..9af0e64 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
+int AArch64TTIImpl::getIntImmCost(int64_t Val) {
// Check if the immediate can be encoded within an instruction.
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
return 0;
@@ -37,7 +37,7 @@ unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
}
/// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -51,18 +51,18 @@ unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
- unsigned Cost = 0;
+ int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
- return std::max(1U, Cost);
+ return std::max(1, Cost);
}
-unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -118,17 +118,17 @@ unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}
if (Idx == ImmIdx) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
return AArch64TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -147,10 +147,10 @@ unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
if (Idx == 1) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
break;
@@ -176,8 +176,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -187,7 +186,31 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (!SrcTy.isSimple() || !DstTy.isSimple())
return BaseT::getCastInstrCost(Opcode, Dst, Src);
- static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
+ static const TypeConversionCostTblEntry
+ ConversionTbl[] = {
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+
+ // The number of shll instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -210,6 +233,16 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ // Complex: to v8f32
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+
+ // Complex: to v16f32
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+
// Complex: to v2f64
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
@@ -250,22 +283,21 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
};
- int Idx = ConvertCostTableLookup<MVT>(
- ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
- if (Idx != -1)
- return ConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -281,15 +313,15 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
}
// All other insert/extracts cost this much.
- return 2;
+ return 3;
}
-unsigned AArch64TTIImpl::getArithmeticInstrCost(
+int AArch64TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -300,10 +332,9 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence ADD + CMP + SELECT + SRA.
// The OperandValue properties many not be same as that of previous
// operation; conservatively assume OP_None.
- unsigned Cost =
- getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
+ int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -331,7 +362,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
}
}
-unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -346,19 +377,20 @@ unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
-unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
- // We don't lower vector selects well that are wider than the register width.
+ // We don't lower some vector selects well that are wider than the register
+ // width.
if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
// We would need this many instructions to hide the scalarization happening.
- const unsigned AmortizationCost = 20;
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ const int AmortizationCost = 20;
+ static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
@@ -367,20 +399,18 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx =
- ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return VectorSelectTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
}
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isIntegerTy(64)) {
@@ -389,7 +419,7 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// practice on inlined memcpy code.
// We make v2i64 stores expensive so that we will only vectorize if there
// are 6 other instructions getting vectorized.
- unsigned AmortizationCost = 6;
+ int AmortizationCost = 6;
return LT.first * 2 * AmortizationCost;
}
@@ -407,16 +437,18 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- unsigned Alignment, unsigned AddressSpace) {
+int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
@@ -427,8 +459,8 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
Alignment, AddressSpace);
}
-unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
- unsigned Cost = 0;
+int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
+ int Cost = 0;
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
@@ -506,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_ld4:
Info.ReadMem = true;
Info.WriteMem = false;
- Info.Vol = false;
+ Info.IsSimple = true;
Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(0);
break;
@@ -515,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.Vol = false;
+ Info.IsSimple = true;
Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
break;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 444d3cc..ec58c4f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -48,7 +48,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
};
public:
- explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+ explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -63,12 +63,11 @@ public:
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(int64_t Val);
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(int64_t Val);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
/// @}
@@ -76,6 +75,8 @@ public:
/// \name Vector TTI Implementations
/// @{
+ bool enableInterleavedAccessVectorization() { return true; }
+
unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
if (ST->hasNEON())
@@ -96,25 +97,25 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+ int getAddressComputationCost(Type *Ty, bool IsComplex);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
- unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+ int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -123,11 +124,9 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 38e8b4d..394c8e7 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -43,7 +43,6 @@ class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
private:
StringRef Mnemonic; ///< Instruction mnemonic.
- MCSubtargetInfo &STI;
// Map of register aliases registers via the .req directive.
StringMap<std::pair<bool, unsigned> > RegisterReqs;
@@ -101,6 +100,7 @@ private:
OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
+ OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
@@ -115,16 +115,16 @@ public:
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
- AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI) {
+ : MCTargetAsmParser(Options, STI) {
MCAsmParserExtension::Initialize(Parser);
MCStreamer &S = getParser().getStreamer();
if (S.getTargetStreamer() == nullptr)
new AArch64TargetStreamer(S);
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -160,7 +160,8 @@ private:
k_Prefetch,
k_ShiftExtend,
k_FPImm,
- k_Barrier
+ k_Barrier,
+ k_PSBHint,
} Kind;
SMLoc StartLoc, EndLoc;
@@ -228,6 +229,12 @@ private:
unsigned Length;
};
+ struct PSBHintOp {
+ unsigned Val;
+ const char *Data;
+ unsigned Length;
+ };
+
struct ShiftExtendOp {
AArch64_AM::ShiftExtendType Type;
unsigned Amount;
@@ -251,6 +258,7 @@ private:
struct SysRegOp SysReg;
struct SysCRImmOp SysCRImm;
struct PrefetchOp Prefetch;
+ struct PSBHintOp PSBHint;
struct ShiftExtendOp ShiftExtend;
};
@@ -302,6 +310,9 @@ public:
case k_Prefetch:
Prefetch = o.Prefetch;
break;
+ case k_PSBHint:
+ PSBHint = o.PSBHint;
+ break;
case k_ShiftExtend:
ShiftExtend = o.ShiftExtend;
break;
@@ -393,6 +404,16 @@ public:
return Prefetch.Val;
}
+ unsigned getPSBHint() const {
+ assert(Kind == k_PSBHint && "Invalid access!");
+ return PSBHint.Val;
+ }
+
+ StringRef getPSBHintName() const {
+ assert(Kind == k_PSBHint && "Invalid access!");
+ return StringRef(PSBHint.Data, PSBHint.Length);
+ }
+
StringRef getPrefetchName() const {
assert(Kind == k_Prefetch && "Invalid access!");
return StringRef(Prefetch.Data, Prefetch.Length);
@@ -497,6 +518,15 @@ public:
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
+ bool isImm0_1() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 2);
+ }
bool isImm0_7() const {
if (!isImm())
return false;
@@ -876,12 +906,15 @@ public:
}
bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
-
return SysReg.MSRReg != -1U;
}
- bool isSystemPStateField() const {
+ bool isSystemPStateFieldWithImm0_1() const {
if (!isSysReg()) return false;
-
+ return (SysReg.PStateField == AArch64PState::PAN ||
+ SysReg.PStateField == AArch64PState::UAO);
+ }
+ bool isSystemPStateFieldWithImm0_15() const {
+ if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
return SysReg.PStateField != -1U;
}
bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
@@ -950,6 +983,7 @@ public:
}
bool isSysCR() const { return Kind == k_SysCR; }
bool isPrefetch() const { return Kind == k_Prefetch; }
+ bool isPSBHint() const { return Kind == k_PSBHint; }
bool isShiftExtend() const { return Kind == k_ShiftExtend; }
bool isShifter() const {
if (!isShiftExtend())
@@ -1175,8 +1209,10 @@ public:
template <unsigned NumRegs>
void addVectorList64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1,
- AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 };
+ static const unsigned FirstRegs[] = { AArch64::D0,
+ AArch64::D0_D1,
+ AArch64::D0_D1_D2,
+ AArch64::D0_D1_D2_D3 };
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
@@ -1186,8 +1222,10 @@ public:
template <unsigned NumRegs>
void addVectorList128Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1,
- AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 };
+ static const unsigned FirstRegs[] = { AArch64::Q0,
+ AArch64::Q0_Q1,
+ AArch64::Q0_Q1_Q2,
+ AArch64::Q0_Q1_Q2_Q3 };
unsigned FirstReg = FirstRegs[NumRegs - 1];
Inst.addOperand(
@@ -1304,6 +1342,12 @@ public:
Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16));
}
+ void addImm0_1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(MCE->getValue()));
+ }
+
void addImm0_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
@@ -1491,7 +1535,13 @@ public:
Inst.addOperand(MCOperand::createImm(SysReg.MSRReg));
}
- void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
+ void addSystemPStateFieldWithImm0_1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+
+ Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
+ }
+
+ void addSystemPStateFieldWithImm0_15Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
@@ -1507,6 +1557,11 @@ public:
Inst.addOperand(MCOperand::createImm(getPrefetch()));
}
+ void addPSBHintOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(getPSBHint()));
+ }
+
void addShifterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
unsigned Imm =
@@ -1703,6 +1758,19 @@ public:
return Op;
}
+ static std::unique_ptr<AArch64Operand> CreatePSBHint(unsigned Val,
+ StringRef Str,
+ SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_PSBHint, Ctx);
+ Op->PSBHint.Val = Val;
+ Op->PSBHint.Data = Str.data();
+ Op->PSBHint.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<AArch64Operand>
CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -1776,6 +1844,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
OS << "<prfop invalid #" << getPrefetch() << ">";
break;
}
+ case k_PSBHint: {
+ OS << getPSBHintName();
+ break;
+ }
case k_ShiftExtend: {
OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
<< getShiftExtendAmount();
@@ -1849,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) {
.Case(".h", true)
.Case(".s", true)
.Case(".d", true)
+ // Needed for fp16 scalar pairwise reductions
+ .Case(".2h", true)
.Default(false);
}
@@ -2016,7 +2090,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
StringRef Name =
- Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
S, getContext()));
return MatchOperand_Success;
@@ -2030,7 +2104,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
unsigned prfop =
- Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
@@ -2042,6 +2116,32 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
return MatchOperand_Success;
}
+/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ bool Valid;
+ auto Mapper = AArch64PSBHint::PSBHintMapper();
+ unsigned psbhint =
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
+ if (!Valid) {
+ TokError("invalid operand for instruction");
+ return MatchOperand_ParseFail;
+ }
+
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(AArch64Operand::CreatePSBHint(psbhint, Tok.getString(),
+ S, getContext()));
+ return MatchOperand_Success;
+}
+
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
AArch64AsmParser::OperandMatchResultTy
@@ -2439,6 +2539,13 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
} else if (!Op.compare_lower("cisw")) {
// SYS #0, C7, C14, #2
SYS_ALIAS(0, 7, 14, 2);
+ } else if (!Op.compare_lower("cvap")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #3, C7, C12, #1
+ SYS_ALIAS(3, 7, 12, 1);
+ } else {
+ return TokError("DC CVAP requires ARMv8.2a");
+ }
} else {
return TokError("invalid operand for DC instruction");
}
@@ -2479,6 +2586,20 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
} else if (!Op.compare_lower("s12e0w")) {
// SYS #4, C7, C8, #7
SYS_ALIAS(4, 7, 8, 7);
+ } else if (!Op.compare_lower("s1e1rp")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #0, C7, C9, #0
+ SYS_ALIAS(0, 7, 9, 0);
+ } else {
+ return TokError("AT S1E1RP requires ARMv8.2a");
+ }
+ } else if (!Op.compare_lower("s1e1wp")) {
+ if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+ // SYS #0, C7, C9, #1
+ SYS_ALIAS(0, 7, 9, 1);
+ } else {
+ return TokError("AT S1E1WP requires ARMv8.2a");
+ }
} else {
return TokError("invalid operand for AT instruction");
}
@@ -2644,7 +2765,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
StringRef Name =
- Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+ Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
ExprLoc, getContext()));
return MatchOperand_Success;
@@ -2658,7 +2779,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
unsigned Opt =
- Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+ Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2687,20 +2808,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
bool IsKnown;
auto MRSMapper = AArch64SysReg::MRSMapper();
- uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
- IsKnown);
+ uint32_t MRSReg = MRSMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (MRSReg != -1U) &&
"register should be -1 if and only if it's unknown");
auto MSRMapper = AArch64SysReg::MSRMapper();
- uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
- IsKnown);
+ uint32_t MSRReg = MSRMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
auto PStateMapper = AArch64PState::PStateMapper();
uint32_t PStateField =
- PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
+ PStateMapper.fromString(Tok.getString(),
+ getSTI().getFeatureBits(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
@@ -3151,7 +3273,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (Operands.size() < 2 ||
!static_cast<AArch64Operand &>(*Operands[1]).isReg())
- return true;
+ return Error(Loc, "Only valid when first operand is register");
bool IsXReg =
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
@@ -3183,7 +3305,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
}
// If it is a label or an imm that cannot fit in a movz, put it into CP.
const MCExpr *CPLoc =
- getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4);
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4, Loc);
Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
return false;
}
@@ -3601,6 +3723,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
case Match_InvalidMemoryIndexed16:
return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+ case Match_InvalidImm0_1:
+ return Error(Loc, "immediate must be an integer in range [0, 1].");
case Match_InvalidImm0_7:
return Error(Loc, "immediate must be an integer in range [0, 7].");
case Match_InvalidImm0_15:
@@ -3912,7 +4036,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
unsigned zreg =
- AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
+ !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(
RegOp.getReg())
? AArch64::WZR
: AArch64::XZR;
@@ -3929,10 +4053,27 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// If that fails, try against the alternate table containing long-form NEON:
// "fadd v0.2s, v1.2s, v2.2s"
- if (MatchResult != Match_Success)
+ if (MatchResult != Match_Success) {
+ // But first, save the short-form match result: we can use it in case the
+ // long-form match also fails.
+ auto ShortFormNEONErrorInfo = ErrorInfo;
+ auto ShortFormNEONMatchResult = MatchResult;
+
MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+ // Now, both matches failed, and the long-form match failed on the mnemonic
+ // suffix token operand. The short-form match failure is probably more
+ // relevant: use it instead.
+ if (MatchResult == Match_InvalidOperand && ErrorInfo == 1 &&
+ Operands.size() > 1 && ((AArch64Operand &)*Operands[1]).isToken() &&
+ ((AArch64Operand &)*Operands[1]).isTokenSuffix()) {
+ MatchResult = ShortFormNEONMatchResult;
+ ErrorInfo = ShortFormNEONErrorInfo;
+ }
+ }
+
+
switch (MatchResult) {
case Match_Success: {
// Perform range checking and other semantic validations
@@ -3944,7 +4085,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
}
case Match_MissingFeature: {
@@ -3966,6 +4107,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return showMatchError(IDLoc, MatchResult);
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
+
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
@@ -4011,6 +4153,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexed8SImm7:
case Match_InvalidMemoryIndexed16SImm7:
case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidImm0_1:
case Match_InvalidImm0_7:
case Match_InvalidImm0_15:
case Match_InvalidImm0_31:
@@ -4083,7 +4226,7 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getParser().parseExpression(Value))
return true;
- getParser().getStreamer().EmitValue(Value, Size);
+ getParser().getStreamer().EmitValue(Value, Size, L);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -4155,7 +4298,7 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
Inst.setOpcode(AArch64::TLSDESCCALL);
Inst.addOperand(MCOperand::createExpr(Expr));
- getParser().getStreamer().EmitInstruction(Inst, STI);
+ getParser().getStreamer().EmitInstruction(Inst, getSTI());
return false;
}
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index db9fb0e..f1f968e 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1516,6 +1516,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
uint64_t pstate_field = (op1 << 3) | op2;
+ if ((pstate_field == AArch64PState::PAN ||
+ pstate_field == AArch64PState::UAO) && crm > 1)
+ return Fail;
+
Inst.addOperand(MCOperand::createImm(pstate_field));
Inst.addOperand(MCOperand::createImm(crm));
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 7f56c2c..d8a8108 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -55,7 +56,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
unsigned Opcode = MI->getOpcode();
if (Opcode == AArch64::SYSxt)
- if (printSysAlias(MI, O)) {
+ if (printSysAlias(MI, STI, O)) {
printAnnotation(O, Annot);
return;
}
@@ -269,7 +270,7 @@ struct LdStNInstrDesc {
int NaturalOffset;
};
-static LdStNInstrDesc LdStNInstInfo[] = {
+static const LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::LD1i8, "ld1", ".b", 1, true, 0 },
{ AArch64::LD1i16, "ld1", ".h", 1, true, 0 },
{ AArch64::LD1i32, "ld1", ".s", 1, true, 0 },
@@ -612,7 +613,7 @@ static LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 },
};
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
unsigned Idx;
for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
if (LdStNInstInfo[Idx].Opcode == Opcode)
@@ -641,7 +642,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+ if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
// Now onto the operands: first a vector list with possible lane
@@ -674,7 +675,9 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
AArch64InstPrinter::printInst(MI, O, Annot, STI);
}
-bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
#ifndef NDEBUG
unsigned Opcode = MI->getOpcode();
assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
@@ -729,6 +732,11 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
if (Op1Val == 3 && Op2Val == 1)
Asm = "dc\tcvau";
break;
+ case 12:
+ if (Op1Val == 3 && Op2Val == 1 &&
+ (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
+ Asm = "dc\tcvap";
+ break;
case 14:
if (Op1Val == 3 && Op2Val == 1)
Asm = "dc\tcivac";
@@ -773,6 +781,21 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
break;
}
break;
+ case 9:
+ switch (Op1Val) {
+ default:
+ break;
+ case 0:
+ if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "at\ts1e1rp"; break;
+ case 1: Asm = "at\ts1e1wp"; break;
+ }
+ }
+ break;
+ }
}
} else if (CnVal == 8) {
// TLBI aliases
@@ -1122,6 +1145,19 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
O << '#' << prfop;
}
+void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned psbhintop = MI->getOperand(OpNum).getImm();
+ bool Valid;
+ StringRef Name =
+ AArch64PSBHint::PSBHintMapper().toString(psbhintop, STI.getFeatureBits(), Valid);
+ if (Valid)
+ O << Name;
+ else
+ O << '#' << psbhintop;
+}
+
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 15dee97..ea68d98 100644
--- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,14 +15,10 @@
#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class AArch64InstPrinter : public MCInstPrinter {
public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -48,7 +44,8 @@ public:
unsigned AltIdx = AArch64::NoRegAltName);
protected:
- bool printSysAlias(const MCInst *MI, raw_ostream &O);
+ bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
// Operand printers
void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
@@ -122,6 +119,9 @@ protected:
void printPrefetchOp(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPSBHintOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
void printFPImmOperand(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index ed24343..648b1df 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -364,6 +364,32 @@ static inline float getFPImmFloat(unsigned Imm) {
return FPUnion.F;
}
+/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
+/// floating-point value. If the value cannot be represented as an 8-bit
+/// floating-point value, then return -1.
+static inline int getFP16Imm(const APInt &Imm) {
+ uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
+ int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15
+ int32_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits
+
+ // We can handle 4 bits of mantissa.
+ // mantissa = (16+UInt(e:f:g:h))/16.
+ if (Mantissa & 0x3f)
+ return -1;
+ Mantissa >>= 6;
+
+ // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+ if (Exp < -3 || Exp > 4)
+ return -1;
+ Exp = ((Exp+3) & 0x7) ^ 4;
+
+ return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+static inline int getFP16Imm(const APFloat &FPImm) {
+ return getFP16Imm(FPImm.bitcastToAPInt());
+}
+
/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
/// floating-point value. If the value cannot be represented as an 8-bit
/// floating-point value, then return -1.
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 16d5356..d26604f 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -128,10 +128,9 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
private:
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 921c4b9..fbce26e 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -48,10 +48,6 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
UseDataRegionDirectives = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
-
- // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use
- // LShr instead of AShr.
- UseLogicalShr = true;
}
const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 2870341..a540f49 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -85,13 +85,13 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
Streamer.visitUsedExpr(*getSubExpr());
}
-MCSection *AArch64MCExpr::findAssociatedSection() const {
+MCFragment *AArch64MCExpr::findAssociatedFragment() const {
llvm_unreachable("FIXME: what goes here?");
}
bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const {
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
return false;
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 1165314..db36a65 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -149,11 +149,10 @@ public:
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override;
+ MCFragment *findAssociatedFragment() const override;
- bool evaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout,
- const MCFixup *Fixup) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
@@ -162,7 +161,6 @@ public:
}
static bool classof(const AArch64MCExpr *) { return true; }
-
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 741b273..61c96f1 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -90,9 +90,11 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
Log2Size = llvm::Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
- default:
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "ADR/ADRP relocations must be GOT relative");
+ default: {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "ADR/ADRP relocations must be GOT relative");
+ return false;
+ }
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
@@ -170,25 +172,25 @@ void AArch64MachObjectWriter::recordRelocation(
// assembler local symbols. If we got here, that's not what we have,
// so complain loudly.
if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "conditional branch requires assembler-local"
- " label. '" +
- Target.getSymA()->getSymbol().getName() +
- "' is external.");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "conditional branch requires assembler-local"
+ " label. '" +
+ Target.getSymA()->getSymbol().getName() +
+ "' is external.");
return;
}
// 14-bit branch relocations should only target internal labels, and so
// should never get here.
if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "Invalid relocation on conditional branch!");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "Invalid relocation on conditional branch!");
return;
}
if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
- Asm)) {
- Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
+ Asm)) {
+ Asm.getContext().reportError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
return;
}
@@ -200,8 +202,9 @@ void AArch64MachObjectWriter::recordRelocation(
Type = MachO::ARM64_RELOC_UNSIGNED;
if (IsPCRel) {
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "PC relative absolute relocation!");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "PC relative absolute relocation!");
+ return;
// FIXME: x86_64 sets the type to a branch reloc here. Should we do
// something similar?
@@ -229,16 +232,20 @@ void AArch64MachObjectWriter::recordRelocation(
Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
return;
} else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
- Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
// Otherwise, neither symbol can be modified.
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation of modified symbol");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of modified symbol");
+ return;
+ }
// We don't support PCrel relocations of differences.
- if (IsPCRel)
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported pc-relative relocation of "
- "difference");
+ if (IsPCRel) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported pc-relative relocation of "
+ "difference");
+ return;
+ }
// AArch64 always uses external relocations. If there is no symbol to use as
// a base address (a local symbol with no preceding non-local symbol),
@@ -246,20 +253,26 @@ void AArch64MachObjectWriter::recordRelocation(
//
// FIXME: We should probably just synthesize an external symbol and use
// that.
- if (!A_Base)
- Asm.getContext().reportFatalError(
+ if (!A_Base) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + A->getName() +
"'. Must have non-local symbol earlier in section.");
- if (!B_Base)
- Asm.getContext().reportFatalError(
+ return;
+ }
+ if (!B_Base) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + B->getName() +
"'. Must have non-local symbol earlier in section.");
+ return;
+ }
- if (A_Base == B_Base && A_Base)
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation with identical base");
+ if (A_Base == B_Base && A_Base) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation with identical base");
+ return;
+ }
Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
(!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress(
@@ -309,10 +322,12 @@ void AArch64MachObjectWriter::recordRelocation(
// we need to preserve and merge with the new Target? How about
// the FixedValue?
if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout,
- &Fixup))
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unable to resolve variable '" +
- Symbol->getName() + "'");
+ &Fixup)) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unable to resolve variable '" +
+ Symbol->getName() + "'");
+ return;
+ }
return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
FixedValue);
}
@@ -337,11 +352,13 @@ void AArch64MachObjectWriter::recordRelocation(
Value +=
Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base);
} else if (Symbol->isInSection()) {
- if (!CanUseLocalRelocation)
- Asm.getContext().reportFatalError(
+ if (!CanUseLocalRelocation) {
+ Asm.getContext().reportError(
Fixup.getLoc(),
"unsupported relocation of local symbol '" + Symbol->getName() +
"'. Must have non-local symbol earlier in section.");
+ return;
+ }
// Adjust the relocation to be section-relative.
// The index is the section ordinal (1-based).
const MCSection &Sec = Symbol->getSection();
@@ -361,9 +378,10 @@ void AArch64MachObjectWriter::recordRelocation(
return;
}
}
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
"unsupported relocation of variable '" +
Symbol->getName() + "'");
+ return;
}
}
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 52b000d..3e86a42 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -26,8 +26,9 @@ AArch64TargetStreamer::~AArch64TargetStreamer() {}
// The constant pool handling is shared by all AArch64TargetStreamer
// implementations.
const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr,
- unsigned Size) {
- return ConstantPools->addEntry(Streamer, Expr, Size);
+ unsigned Size,
+ SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, Size, Loc);
}
void AArch64TargetStreamer::emitCurrentConstantPool() {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index fcc0d05..51432830 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -24,7 +24,7 @@ public:
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
/// MCExpr that can be used to refer to the constant pool location.
- const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size);
+ const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size, SMLoc Loc);
/// Callback used to implemnt the .ltorg directive.
/// Emit contents of constant pool for the current section.
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ee85b65b..78f5289 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -146,11 +146,22 @@ const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings
// v8.1a "Privileged Access Never" extension-specific PStates
{"pan", PAN, {AArch64::HasV8_1aOps}},
+
+ // v8.2a
+ {"uao", UAO, {AArch64::HasV8_2aOps}},
};
AArch64PState::PStateMapper::PStateMapper()
: AArch64NamedImmMapper(PStateMappings, 0) {}
+const AArch64NamedImmMapper::Mapping AArch64PSBHint::PSBHintMapper::PSBHintMappings[] = {
+ // v8.2a "Statistical Profiling" extension-specific PSB operand
+ {"csync", CSync, {AArch64::FeatureSPE}},
+};
+
+AArch64PSBHint::PSBHintMapper::PSBHintMapper()
+ : AArch64NamedImmMapper(PSBHintMappings, 0) {}
+
const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"mdccsr_el0", MDCCSR_EL0, {}},
{"dbgdtrrx_el0", DBGDTRRX_EL0, {}},
@@ -192,6 +203,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
{"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}},
{"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}},
{"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}},
+ {"id_aa64mmfr2_el1", ID_A64MMFR2_EL1, {AArch64::HasV8_2aOps}},
{"mvfr0_el1", MVFR0_EL1, {}},
{"mvfr1_el1", MVFR1_EL1, {}},
{"mvfr2_el1", MVFR2_EL1, {}},
@@ -275,9 +287,6 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
{"icc_sgi1r_el1", ICC_SGI1R_EL1, {}},
{"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}},
{"icc_sgi0r_el1", ICC_SGI0R_EL1, {}},
-
- // v8.1a "Privileged Access Never" extension-specific system registers
- {"pan", PAN, {AArch64::HasV8_1aOps}},
};
AArch64SysReg::MSRMapper::MSRMapper() {
@@ -804,6 +813,24 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
{"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}},
{"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}},
{"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}},
+
+ // v8.2a registers
+ {"uao", UAO, {AArch64::HasV8_2aOps}},
+
+ // v8.2a "Statistical Profiling extension" registers
+ {"pmblimitr_el1", PMBLIMITR_EL1, {AArch64::FeatureSPE}},
+ {"pmbptr_el1", PMBPTR_EL1, {AArch64::FeatureSPE}},
+ {"pmbsr_el1", PMBSR_EL1, {AArch64::FeatureSPE}},
+ {"pmbidr_el1", PMBIDR_EL1, {AArch64::FeatureSPE}},
+ {"pmscr_el2", PMSCR_EL2, {AArch64::FeatureSPE}},
+ {"pmscr_el12", PMSCR_EL12, {AArch64::FeatureSPE}},
+ {"pmscr_el1", PMSCR_EL1, {AArch64::FeatureSPE}},
+ {"pmsicr_el1", PMSICR_EL1, {AArch64::FeatureSPE}},
+ {"pmsirr_el1", PMSIRR_EL1, {AArch64::FeatureSPE}},
+ {"pmsfcr_el1", PMSFCR_EL1, {AArch64::FeatureSPE}},
+ {"pmsevfr_el1", PMSEVFR_EL1, {AArch64::FeatureSPE}},
+ {"pmslatfr_el1", PMSLATFR_EL1, {AArch64::FeatureSPE}},
+ {"pmsidr_el1", PMSIDR_EL1, {AArch64::FeatureSPE}},
};
uint32_t
diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7e42f8e..f649cb9 100644
--- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -337,7 +337,9 @@ namespace AArch64AT {
S12E1R = 0x63c4, // 01 100 0111 1000 100
S12E1W = 0x63c5, // 01 100 0111 1000 101
S12E0R = 0x63c6, // 01 100 0111 1000 110
- S12E0W = 0x63c7 // 01 100 0111 1000 111
+ S12E0W = 0x63c7, // 01 100 0111 1000 111
+ S1E1RP = 0x43c8, // 01 000 0111 1001 000
+ S1E1WP = 0x43c9 // 01 000 0111 1001 001
};
struct ATMapper : AArch64NamedImmMapper {
@@ -463,6 +465,9 @@ namespace AArch64PState {
// v8.1a "Privileged Access Never" extension-specific PStates
PAN = 0x04,
+
+ // v8.2a "User Access Override" extension-specific PStates
+ UAO = 0x03
};
struct PStateMapper : AArch64NamedImmMapper {
@@ -473,6 +478,21 @@ namespace AArch64PState {
}
+namespace AArch64PSBHint {
+ enum PSBHintValues {
+ Invalid = -1,
+ // v8.2a "Statistical Profiling" extension-specific PSB operands
+ CSync = 0x11, // psb csync = hint #0x11
+ };
+
+ struct PSBHintMapper : AArch64NamedImmMapper {
+ const static Mapping PSBHintMappings[];
+
+ PSBHintMapper();
+ };
+
+}
+
namespace AArch64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
@@ -594,6 +614,7 @@ namespace AArch64SysReg {
ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ ID_A64MMFR2_EL1 = 0xc03a, // 11 000 0000 0111 010
MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
@@ -1190,6 +1211,24 @@ namespace AArch64SysReg {
SPSR_EL12 = 0xea00, // 11 101 0100 0000 000
ELR_EL12 = 0xea01, // 11 101 0100 0000 001
+ // v8.2a registers
+ UAO = 0xc214, // 11 000 0100 0010 100
+
+ // v8.2a "Statistical Profiling extension" registers
+ PMBLIMITR_EL1 = 0xc4d0, // 11 000 1001 1010 000
+ PMBPTR_EL1 = 0xc4d1, // 11 000 1001 1010 001
+ PMBSR_EL1 = 0xc4d3, // 11 000 1001 1010 011
+ PMBIDR_EL1 = 0xc4d7, // 11 000 1001 1010 111
+ PMSCR_EL2 = 0xe4c8, // 11 100 1001 1001 000
+ PMSCR_EL12 = 0xecc8, // 11 101 1001 1001 000
+ PMSCR_EL1 = 0xc4c8, // 11 000 1001 1001 000
+ PMSICR_EL1 = 0xc4ca, // 11 000 1001 1001 010
+ PMSIRR_EL1 = 0xc4cb, // 11 000 1001 1001 011
+ PMSFCR_EL1 = 0xc4cc, // 11 000 1001 1001 100
+ PMSEVFR_EL1 = 0xc4cd, // 11 000 1001 1001 101
+ PMSLATFR_EL1 = 0xc4ce, // 11 000 1001 1001 110
+ PMSIDR_EL1 = 0xc4cf, // 11 000 1001 1001 111
+
// Cyclone specific system registers
CPM_IOACC_CTL_EL3 = 0xff90,
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25..8c3cb56 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -44,15 +44,21 @@ FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixControlFlowLiveIntervalsPass();
-FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
-FunctionPass *createSIPrepareScratchRegs();
+
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
+extern char &AMDGPUAnnotateKernelFeaturesID;
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
+void initializeSIFixSGPRCopiesPass(PassRegistry &);
+extern char &SIFixSGPRCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -64,6 +70,8 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
ModulePass *createAMDGPUAlwaysInlinePass();
+ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
+FunctionPass *createAMDGPUAnnotateUniformValues();
void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
extern char &SIFixControlFlowLiveIntervalsID;
@@ -71,6 +79,8 @@ extern char &SIFixControlFlowLiveIntervalsID;
void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
extern char &SIFixSGPRLiveRangesID;
+void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
+extern char &AMDGPUAnnotateUniformValuesPassID;
extern Target TheAMDGPUTarget;
extern Target TheGCNTarget;
@@ -85,8 +95,6 @@ enum TargetIndex {
};
}
-#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
-
} // End namespace llvm
namespace ShaderType {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 68b5050..d4af8d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -108,6 +108,11 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-fol
"true",
"Force using DS instruction immediate offsets on SI">;
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+ "FlatForGlobal",
+ "true",
+ "Force to generate flat instruction for global">;
+
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",
@@ -272,9 +277,14 @@ def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
>, AssemblerPredicate<"FeatureGCN1Encoding">;
+def isVI : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN3Encoding">;
+
class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
+ Predicate VIAssemblerPredicate = isVI;
list<Predicate> AssemblerPredicates = [];
Predicate AssemblerPredicate = TruePredicate;
list<Predicate> OtherPredicates = [];
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
new file mode 100644
index 0000000..3781839
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -0,0 +1,126 @@
+//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass adds target attributes to functions which use intrinsics
+/// which will impact calling convention lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateKernelFeatures : public ModulePass {
+private:
+ void addAttrToCallers(Function *Intrin, StringRef AttrName);
+ bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
+
+public:
+ static char ID;
+
+ AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+ bool runOnModule(Module &M) override;
+ const char *getPassName() const override {
+ return "AMDGPU Annotate Kernel Features";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+};
+
+}
+
+char AMDGPUAnnotateKernelFeatures::ID = 0;
+
+char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
+
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+ "Add AMDGPU function attributes", false, false)
+INITIALIZE_PASS_END(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+ "Add AMDGPU function attributes", false, false)
+
+
+void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
+ StringRef AttrName) {
+ SmallPtrSet<Function *, 4> SeenFuncs;
+
+ for (User *U : Intrin->users()) {
+ // CallInst is the only valid user for an intrinsic.
+ CallInst *CI = cast<CallInst>(U);
+
+ Function *CallingFunction = CI->getParent()->getParent();
+ if (SeenFuncs.insert(CallingFunction).second)
+ CallingFunction->addFnAttr(AttrName);
+ }
+}
+
+bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
+ Module &M,
+ ArrayRef<StringRef[2]> IntrinsicToAttr) {
+ bool Changed = false;
+
+ for (const StringRef *Arr : IntrinsicToAttr) {
+ if (Function *Fn = M.getFunction(Arr[0])) {
+ addAttrToCallers(Fn, Arr[1]);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
+ Triple TT(M.getTargetTriple());
+
+ static const StringRef IntrinsicToAttr[][2] = {
+ // .x omitted
+ { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
+ { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
+
+ // .x omitted
+ { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
+ { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
+
+ };
+
+ static const StringRef HSAIntrinsicToAttr[][2] = {
+ { "llvm.r600.read.local.size.x", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.local.size.y", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.local.size.z", "amdgpu-dispatch-ptr" },
+
+ { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
+ { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
+ { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }
+ };
+
+ // TODO: Intrinsics that require queue ptr.
+
+ // We do not need to note the x workitem or workgroup id because they are
+ // always initialized.
+
+ bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
+ if (TT.getOS() == Triple::AMDHSA)
+ Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
+
+ return Changed;
+}
+
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
+ return new AMDGPUAnnotateKernelFeatures();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
new file mode 100644
index 0000000..dfddc34
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -0,0 +1,84 @@
+//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass adds amdgpu.uniform metadata to IR values so this information
+/// can be used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-uniform"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateUniformValues : public FunctionPass,
+ public InstVisitor<AMDGPUAnnotateUniformValues> {
+ DivergenceAnalysis *DA;
+
+public:
+ static char ID;
+ AMDGPUAnnotateUniformValues() :
+ FunctionPass(ID) { }
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ const char *getPassName() const override { return "AMDGPU Annotate Uniform Values"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DivergenceAnalysis>();
+ AU.setPreservesAll();
+ }
+
+ void visitLoadInst(LoadInst &I);
+
+};
+
+} // End anonymous namespace
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+ "Add AMDGPU uniform metadata", false, false)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+ "Add AMDGPU uniform metadata", false, false)
+
+char AMDGPUAnnotateUniformValues::ID = 0;
+
+void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
+ Value *Ptr = I.getPointerOperand();
+ if (!DA->isUniform(Ptr))
+ return;
+
+ if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
+ PtrI->setMetadata("amdgpu.uniform", MDNode::get(I.getContext(), {}));
+
+}
+
+bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+ return false;
+}
+
+bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
+ DA = &getAnalysis<DivergenceAnalysis>();
+ visit(F);
+
+ return true;
+}
+
+FunctionPass *
+llvm::createAMDGPUAnnotateUniformValues() {
+ return new AMDGPUAnnotateUniformValues();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0a5309b..ba71dc0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -100,14 +100,63 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
}
}
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
-
- // This label is used to mark the end of the .text section.
- const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- OutStreamer->SwitchSection(TLOF.getTextSection());
- MCSymbol *EndOfTextLabel =
- OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- OutStreamer->EmitLabel(EndOfTextLabel);
+void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+ if (MFI->isKernel() && STM.isAmdHsaOS()) {
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
+ ELF::STT_AMDGPU_HSA_KERNEL);
+ }
+
+ AsmPrinter::EmitFunctionEntryLabel();
+}
+
+static bool isModuleLinkage(const GlobalValue *GV) {
+ switch (GV->getLinkage()) {
+ case GlobalValue::InternalLinkage:
+ case GlobalValue::CommonLinkage:
+ return true;
+ case GlobalValue::ExternalLinkage:
+ return false;
+ default: llvm_unreachable("unknown linkage type");
+ }
+}
+
+void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA) {
+ AsmPrinter::EmitGlobalVariable(GV);
+ return;
+ }
+
+ if (GV->isDeclaration() || GV->getLinkage() == GlobalValue::PrivateLinkage) {
+ AsmPrinter::EmitGlobalVariable(GV);
+ return;
+ }
+
+ // Group segment variables aren't emitted in HSA.
+ if (AMDGPU::isGroupSegment(GV))
+ return;
+
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (isModuleLinkage(GV)) {
+ TS->EmitAMDGPUHsaModuleScopeGlobal(GV->getName());
+ } else {
+ TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
+ }
+
+ const DataLayout &DL = getDataLayout();
+ OutStreamer->PushSection();
+ OutStreamer->SwitchSection(
+ getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
+ MCSymbol *GVSym = getSymbol(GV);
+ const Constant *C = GV->getInitializer();
+ OutStreamer->EmitLabel(GVSym);
+ EmitGlobalConstant(DL, C);
+ OutStreamer->PopSection();
}
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -125,8 +174,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ getSIProgramInfo(KernelInfo, MF);
if (!STM.isAmdHsaOS()) {
- getSIProgramInfo(KernelInfo, MF);
EmitProgramInfoSI(MF, KernelInfo);
}
// Emit directives
@@ -165,6 +214,23 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
false);
+
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
+ Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+ Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+ Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+ Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+ Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
+ false);
+
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer->emitRawComment(
@@ -278,27 +344,30 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
unsigned width = 0;
bool isSGPR = false;
- if (!MO.isReg()) {
+ if (!MO.isReg())
continue;
- }
+
unsigned reg = MO.getReg();
- if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
- reg == AMDGPU::VCC_HI) {
+ switch (reg) {
+ case AMDGPU::EXEC:
+ case AMDGPU::SCC:
+ case AMDGPU::M0:
+ continue;
+
+ case AMDGPU::VCC:
+ case AMDGPU::VCC_LO:
+ case AMDGPU::VCC_HI:
VCCUsed = true;
continue;
- } else if (reg == AMDGPU::FLAT_SCR ||
- reg == AMDGPU::FLAT_SCR_LO ||
- reg == AMDGPU::FLAT_SCR_HI) {
+
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
FlatUsed = true;
continue;
- }
- switch (reg) {
- default: break;
- case AMDGPU::SCC:
- case AMDGPU::EXEC:
- case AMDGPU::M0:
- continue;
+ default:
+ break;
}
if (AMDGPU::SReg_32RegClass.contains(reg)) {
@@ -348,11 +417,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
- if (VCCUsed)
+ if (VCCUsed || FlatUsed)
MaxSGPR += 2;
- if (FlatUsed)
+ if (FlatUsed) {
MaxSGPR += 2;
+ // 2 additional for VI+.
+ if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ MaxSGPR += 2;
+ }
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
@@ -368,6 +441,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
}
+ if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.emitError("too many user SGPRs used");
+ }
+
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
@@ -419,18 +497,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
S_00B848_PRIV(ProgInfo.Priv) |
S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
- S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+ S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
+ // 0 = X, 1 = XY, 2 = XYZ
+ unsigned TIDIGCompCnt = 0;
+ if (MFI->hasWorkItemIDZ())
+ TIDIGCompCnt = 2;
+ else if (MFI->hasWorkItemIDY())
+ TIDIGCompCnt = 1;
+
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
- S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
- S_00B84C_TGID_X_EN(1) |
- S_00B84C_TGID_Y_EN(1) |
- S_00B84C_TGID_Z_EN(1) |
- S_00B84C_TG_SIZE_EN(1) |
- S_00B84C_TIDIG_COMP_CNT(2) |
- S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+ S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+ S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
+ S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
+ S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
+ S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
+ S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
+ S_00B84C_EXCP_EN_MSB(0) |
+ S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+ S_00B84C_EXCP_EN(0);
}
static unsigned getRsrcReg(unsigned ShaderType) {
@@ -491,14 +578,53 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
- header.code_properties =
- AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
- AMD_CODE_PROPERTY_IS_PTR64;
+ header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+
+ if (MFI->hasPrivateSegmentBuffer()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+ }
+
+ if (MFI->hasDispatchPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+
+ if (MFI->hasQueuePtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+
+ if (MFI->hasKernargSegmentPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+
+ if (MFI->hasDispatchID())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+
+ if (MFI->hasFlatScratchInit())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+ // TODO: Private segment size
+
+ if (MFI->hasGridWorkgroupCountX()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
+ }
+
+ if (MFI->hasGridWorkgroupCountY()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
+ }
+
+ if (MFI->hasGridWorkgroupCountZ()) {
+ header.code_properties |=
+ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
+ }
+
+ if (MFI->hasDispatchPtr())
+ header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
-
+ header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+ header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 345af9b..817cbfc 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -99,7 +99,9 @@ public:
void EmitFunctionBodyStart() override;
- void EmitEndOfAsmFile(Module &M) override;
+ void EmitFunctionEntryLabel() override;
+
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
new file mode 100644
index 0000000..2f6b302
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
@@ -0,0 +1,26 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.cpp -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
+
+using namespace llvm;
+
+DiagnosticInfoUnsupported::DiagnosticInfoUnsupported(
+ const Function &Fn,
+ const Twine &Desc,
+ DiagnosticSeverity Severity)
+ : DiagnosticInfo(getKindID(), Severity),
+ Description(Desc),
+ Fn(Fn) { }
+
+int DiagnosticInfoUnsupported::KindID = 0;
+
+void DiagnosticInfoUnsupported::print(DiagnosticPrinter &DP) const {
+ DP << "unsupported " << getDescription() << " in " << Fn.getName();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
new file mode 100644
index 0000000..0fd37e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
@@ -0,0 +1,48 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.h - Error reporting -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+
+namespace llvm {
+
+/// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+ const Twine &Description;
+ const Function &Fn;
+
+ static int KindID;
+
+ static int getKindID() {
+ if (KindID == 0)
+ KindID = llvm::getNextAvailablePluginDiagnosticKind();
+ return KindID;
+ }
+
+public:
+ DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
+ DiagnosticSeverity Severity = DS_Error);
+
+ const Function &getFunction() const { return Fn; }
+ const Twine &getDescription() const { return Description; }
+
+ void print(DiagnosticPrinter &DP) const override;
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == getKindID();
+ }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index 8175786..4d84d28 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -71,9 +71,15 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
}
/// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
+
// Start the offset at 2 so we don't overwrite work group information.
// XXX: We should only do this when the shader actually uses this
// information.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 9f31be1..257a3da 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -8,14 +8,12 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-/// machine.
+/// \brief Interface to describe a layout of a stack frame on an AMDGPU target.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
-#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
-#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -34,7 +32,8 @@ public:
/// \returns The number of 32-bit sub-registers that are used when storing
/// values to the stack.
unsigned getStackWidth(const MachineFunction &MF) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 64c54cc..b33040b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,6 +11,8 @@
/// \brief Defines an instruction selector for the AMDGPU target.
//
//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
@@ -20,9 +22,9 @@
#include "SIISelLowering.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Function.h"
@@ -40,12 +42,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
+
public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
bool runOnMachineFunction(MachineFunction &MF) override;
SDNode *Select(SDNode *N) override;
const char *getPassName() const override;
+ void PreprocessISelDAG() override;
void PostprocessISelDAG() override;
private:
@@ -91,7 +95,7 @@ private:
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
- void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
SDValue &TFE) const;
@@ -108,6 +112,16 @@ private:
SDValue &TFE) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset, SDValue &GLC) const;
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
@@ -273,6 +287,23 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
return N;
}
+static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
+ switch (NumVectorElts) {
+ case 1:
+ return AMDGPU::SReg_32RegClassID;
+ case 2:
+ return AMDGPU::SReg_64RegClassID;
+ case 4:
+ return AMDGPU::SReg_128RegClassID;
+ case 8:
+ return AMDGPU::SReg_256RegClassID;
+ case 16:
+ return AMDGPU::SReg_512RegClassID;
+ }
+
+ llvm_unreachable("invalid vector size");
+}
+
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -306,38 +337,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsEq(MVT::i32));
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- bool UseVReg = true;
- for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
- U != E; ++U) {
- if (!U->isMachineOpcode()) {
- continue;
- }
- const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
- if (!RC) {
- continue;
- }
- if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
- UseVReg = false;
- }
- }
- switch(NumVectorElts) {
- case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
- AMDGPU::SReg_32RegClassID;
- break;
- case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
- AMDGPU::SReg_64RegClassID;
- break;
- case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
- AMDGPU::SReg_128RegClassID;
- break;
- case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
- AMDGPU::SReg_256RegClassID;
- break;
- case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
- AMDGPU::SReg_512RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
+ RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
} else {
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
// that adds a 128 bits reg copy when going through TwoAddressInstructions
@@ -455,98 +455,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
N->getValueType(0), Ops);
}
-
- case ISD::LOAD: {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- SDLoc SL(N);
- EVT VT = N->getValueType(0);
-
- if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
- N = glueCopyToM0(N);
- break;
- }
-
- // To simplify the TableGen patters, we replace all i64 loads with
- // v2i32 loads. Alternatively, we could promote i64 loads to v2i32
- // during DAG legalization, however, so places (ExpandUnalignedLoad)
- // in the DAG legalizer assume that if i64 is legal, so doing this
- // promotion early can cause problems.
-
- SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
- LD->getBasePtr(), LD->getMemOperand());
- SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
- MVT::i64, NewLoad);
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
- SDNode *Load = glueCopyToM0(NewLoad.getNode());
- SelectCode(Load);
- N = BitCast.getNode();
- break;
- }
-
+ case ISD::LOAD:
case ISD::STORE: {
- // Handle i64 stores here for the same reason mentioned above for loads.
- StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Value = ST->getValue();
- if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
-
- SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
- MVT::v2i32, Value);
- SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
- ST->getBasePtr(), ST->getMemOperand());
-
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
-
- if (NewValue.getOpcode() == ISD::BITCAST) {
- Select(NewStore.getNode());
- return SelectCode(NewValue.getNode());
- }
-
- // getNode() may fold the bitcast if its input was another bitcast. If that
- // happens we should only select the new store.
- N = NewStore.getNode();
- }
-
N = glueCopyToM0(N);
break;
}
- case AMDGPUISD::REGISTER_LOAD: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
-
- SDLoc DL(N);
- SelectADDRIndirect(N->getOperand(1), Addr, Offset);
- const SDValue Ops[] = {
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
- CurDAG->getVTList(MVT::i32, MVT::i64,
- MVT::Other),
- Ops);
- }
- case AMDGPUISD::REGISTER_STORE: {
- if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
- break;
- SDValue Addr, Offset;
- SelectADDRIndirect(N->getOperand(2), Addr, Offset);
- SDLoc DL(N);
- const SDValue Ops[] = {
- N->getOperand(1),
- Addr,
- Offset,
- CurDAG->getTargetConstant(0, DL, MVT::i32),
- N->getOperand(0),
- };
- return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
- CurDAG->getVTList(MVT::Other),
- Ops);
- }
-
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -575,7 +489,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
N->getOperand(0), OffsetVal, WidthVal);
-
}
case AMDGPUISD::DIV_SCALE: {
return SelectDIV_SCALE(N);
@@ -601,7 +514,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
-
bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
assert(AS != 0 && "Use checkPrivateAddress instead.");
if (!Ptr)
@@ -681,7 +593,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
if (checkPrivateAddress(N->getMemOperand())) {
if (MMO) {
const PseudoSourceValue *PSV = MMO->getPseudoValue();
- if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ if (PSV && PSV->isConstantPool()) {
return true;
}
}
@@ -847,7 +759,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+ // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+ // omod
SDValue Ops[8];
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
@@ -883,15 +796,39 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
Offset = N1;
return true;
}
- }
+ } else if (Addr.getOpcode() == ISD::SUB) {
+ // sub C, x -> add (sub 0, x), C
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ int64_t ByteOffset = C->getSExtValue();
+ if (isUInt<16>(ByteOffset)) {
+ SDLoc DL(Addr);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ // XXX - This is kind of hacky. Create a dummy sub node so we can check
+ // the known bits in isDSOffsetLegal. We need to emit the selected node
+ // here, so this is thrown away.
+ SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+ MachineSDNode *MachineSub
+ = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ Base = SDValue(MachineSub, 0);
+ Offset = Addr.getOperand(0);
+ return true;
+ }
+ }
+ }
+ } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ // If we have a constant address, prefer to put the constant into the
+ // offset. This can save moves to load the constant address since multiple
+ // operations can share the zero base address register, and enables merging
+ // into read2 / write2 instructions.
- SDLoc DL(Addr);
+ SDLoc DL(Addr);
- // If we have a constant address, prefer to put the constant into the
- // offset. This can save moves to load the constant address since multiple
- // operations can share the zero base address register, and enables merging
- // into read2 / write2 instructions.
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
if (isUInt<16>(CAddr->getZExtValue())) {
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
@@ -904,10 +841,11 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
// default case
Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
return true;
}
+// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
SDValue &Offset1) const {
@@ -926,9 +864,35 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
return true;
}
- }
-
- if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+ } else if (Addr.getOpcode() == ISD::SUB) {
+ // sub C, x -> add (sub 0, x), C
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+ unsigned DWordOffset0 = C->getZExtValue() / 4;
+ unsigned DWordOffset1 = DWordOffset0 + 1;
+
+ if (isUInt<8>(DWordOffset0)) {
+ SDLoc DL(Addr);
+ SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ // XXX - This is kind of hacky. Create a dummy sub node so we can check
+ // the known bits in isDSOffsetLegal. We need to emit the selected node
+ // here, so this is thrown away.
+ SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
+ MachineSDNode *MachineSub
+ = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+ Zero, Addr.getOperand(1));
+
+ Base = SDValue(MachineSub, 0);
+ Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+ Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+ return true;
+ }
+ }
+ }
+ } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
unsigned DWordOffset1 = DWordOffset0 + 1;
assert(4 * DWordOffset0 == CAddr->getZExtValue());
@@ -956,12 +920,16 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
return isUInt<12>(Imm->getZExtValue());
}
-void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
SDValue &VAddr, SDValue &SOffset,
SDValue &Offset, SDValue &Offen,
SDValue &Idxen, SDValue &Addr64,
SDValue &GLC, SDValue &SLC,
SDValue &TFE) const {
+ // Subtarget prefers to use flat instruction
+ if (Subtarget->useFlatForGlobal())
+ return false;
+
SDLoc DL(Addr);
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -994,14 +962,14 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
if (isLegalMUBUFImmOffset(C1)) {
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
- return;
+ return true;
} else if (isUInt<32>(C1->getZExtValue())) {
// Illegal offset, store it in soffset.
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
0);
- return;
+ return true;
}
}
@@ -1013,7 +981,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = N0;
VAddr = N1;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
- return;
+ return true;
}
// default case -> offset
@@ -1021,6 +989,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
Ptr = Addr;
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ return true;
}
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
@@ -1033,8 +1002,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return false;
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE))
+ return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
if (C->getSExtValue()) {
@@ -1052,8 +1022,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset,
- SDValue &SLC) const {
+ SDValue &Offset,
+ SDValue &SLC) const {
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
SDValue GLC, TFE;
@@ -1066,36 +1036,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
SDLoc DL(Addr);
MachineFunction &MF = CurDAG->getMachineFunction();
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- unsigned ScratchOffsetReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
- Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
- ScratchOffsetReg, MVT::i32);
- SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
- SDValue ScratchRsrcDword0 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
- SDValue ScratchRsrcDword1 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
-
- const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
- };
- SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, RsrcOps), 0);
- Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
- SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+ Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+ SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
// (add n0, c1)
if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -1126,8 +1070,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- GLC, SLC, TFE);
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+ GLC, SLC, TFE))
+ return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
@@ -1153,18 +1098,134 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
}
+///
+/// \param EncodedOffset This is the immediate value that will be encoded
+/// directly into the instruction. On SI/CI the \p EncodedOffset
+/// will be in units of dwords and on VI+ it will be units of bytes.
+static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
+ int64_t EncodedOffset) {
+ return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+ isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
+ SDValue &Offset, bool &Imm) const {
+
+ // FIXME: Handle non-constant offsets.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
+ if (!C)
+ return false;
+
+ SDLoc SL(ByteOffsetNode);
+ AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
+ int64_t ByteOffset = C->getSExtValue();
+ int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+ ByteOffset >> 2 : ByteOffset;
+
+ if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+ Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ Imm = true;
+ return true;
+ }
+
+ if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
+ return false;
+
+ if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
+ // 32-bit Immediates are supported on Sea Islands.
+ Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+ } else {
+ SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+ Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
+ C32Bit), 0);
+ }
+ Imm = false;
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
+ SDValue &Offset, bool &Imm) const {
+
+ SDLoc SL(Addr);
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+
+ if (SelectSMRDOffset(N1, Offset, Imm)) {
+ SBase = N0;
+ return true;
+ }
+ }
+ SBase = Addr;
+ Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+ Imm = true;
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+
+ if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ bool Imm;
+ if (!SelectSMRD(Addr, SBase, Offset, Imm))
+ return false;
+
+ return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
+ !isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
+ SDValue &Offset) const {
+ if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ bool Imm;
+ if (!SelectSMRDOffset(Addr, Offset, Imm))
+ return false;
+
+ return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
+ SDValue &Offset) const {
+ bool Imm;
+ return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
+ !isa<ConstantSDNode>(Offset);
+}
+
// FIXME: This is incorrect and only enough to be able to compile.
SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
SDLoc DL(N);
+ const MachineFunction &MF = CurDAG->getMachineFunction();
+ DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
+ "addrspacecast not implemented");
+ CurDAG->getContext()->diagnose(NotImplemented);
+
assert(Subtarget->hasFlatAddressSpace() &&
"addrspacecast only supported with flat address space!");
- assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
- "Cannot cast address space to / from constant address!");
-
assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
"Can only cast to / from flat address space!");
@@ -1190,7 +1251,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
}
-
if (DestSize > SrcSize) {
assert(SrcSize == 32 && DestSize == 64);
@@ -1371,6 +1431,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+ bool Modified = false;
+
+ // XXX - Other targets seem to be able to do this without a worklist.
+ SmallVector<LoadSDNode *, 8> LoadsToReplace;
+ SmallVector<StoreSDNode *, 8> StoresToReplace;
+
+ for (SDNode &Node : CurDAG->allnodes()) {
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
+ EVT VT = LD->getValueType(0);
+ if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ continue;
+
+ // To simplify the TableGen patters, we replace all i64 loads with v2i32
+ // loads. Alternatively, we could promote i64 loads to v2i32 during DAG
+ // legalization, however, so places (ExpandUnalignedLoad) in the DAG
+ // legalizer assume that if i64 is legal, so doing this promotion early
+ // can cause problems.
+ LoadsToReplace.push_back(LD);
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
+ // Handle i64 stores here for the same reason mentioned above for loads.
+ SDValue Value = ST->getValue();
+ if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
+ continue;
+ StoresToReplace.push_back(ST);
+ }
+ }
+
+ for (LoadSDNode *LD : LoadsToReplace) {
+ SDLoc SL(LD);
+
+ SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
+ LD->getBasePtr(), LD->getMemOperand());
+ SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
+ MVT::i64, NewLoad);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
+ Modified = true;
+ }
+
+ for (StoreSDNode *ST : StoresToReplace) {
+ SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
+ MVT::v2i32, ST->getValue());
+ const SDValue StoreOps[] = {
+ ST->getChain(),
+ NewValue,
+ ST->getBasePtr(),
+ ST->getOffset()
+ };
+
+ CurDAG->UpdateNodeOperands(ST, StoreOps);
+ Modified = true;
+ }
+
+ // XXX - Is this necessary?
+ if (Modified)
+ CurDAG->RemoveDeadNodes();
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3a65f3b..222f631 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
@@ -27,50 +28,9 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
using namespace llvm;
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
- const Twine &Description;
- const Function &Fn;
-
- static int KindID;
-
- static int getKindID() {
- if (KindID == 0)
- KindID = llvm::getNextAvailablePluginDiagnosticKind();
- return KindID;
- }
-
-public:
- DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
- DiagnosticSeverity Severity = DS_Error)
- : DiagnosticInfo(getKindID(), Severity),
- Description(Desc),
- Fn(Fn) { }
-
- const Function &getFunction() const { return Fn; }
- const Twine &getDescription() const { return Description; }
-
- void print(DiagnosticPrinter &DP) const override {
- DP << "unsupported " << getDescription() << " in " << Fn.getName();
- }
-
- static bool classof(const DiagnosticInfo *DI) {
- return DI->getKind() == getKindID();
- }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -113,6 +73,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ // This is totally unsupported, just custom lower to produce an error.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -352,7 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Custom);
- setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
@@ -429,12 +392,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setSelectIsExpensive(false);
PredictableSelectIsExpensive = false;
- // There are no integer divide instructions, and these expand to a pretty
- // large sequence of instructions.
- setIntDivIsCheap(false);
- setPow2SDivIsCheap(false);
setFsqrtIsCheap(true);
+ // We want to find all load dependencies for long chains of stores to enable
+ // merging into very wide vectors. The problem is with vectors with > 4
+ // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
+ // vectors are a legal type, even though we have to split the loads
+ // usually. When we can more precisely specify load legality per address
+ // space, we should be able to make FindBetterChain/MergeConsecutiveStores
+ // smarter so that they can figure out what to do in 2 iterations without all
+ // N > 4 stores on the same chain.
+ GatherAllAliasesMaxDepth = 16;
+
// FIXME: Need to really handle these.
MaxStoresPerMemcpy = 4096;
MaxStoresPerMemmove = 4096;
@@ -534,6 +503,18 @@ bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
return true;
}
+bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+ // There are few operations which truly have vector input operands. Any vector
+ // operation is going to involve operations on each component, and a
+ // build_vector will be a copy per element, so it always makes sense to use a
+ // build_vector input in place of the extracted element to avoid a copy into a
+ // super register.
+ //
+ // We should probably only do this if all users are extracts only, but this
+ // should be the common case.
+ return true;
+}
+
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.
return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -617,6 +598,15 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+ DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "dynamic alloca");
+ DAG.getContext()->diagnose(NoDynamicAlloca);
+ return SDValue();
+}
+
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -643,6 +633,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
}
@@ -892,7 +883,9 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+ unsigned IgnoredFrameReg;
+ unsigned Offset =
+ TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
Op.getValueType());
}
@@ -1043,9 +1036,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_brev:
- return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
-
case Intrinsic::AMDGPU_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
@@ -1057,6 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_brev: // Legacy name
+ return DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(1));
}
}
@@ -1077,6 +1069,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
+ // TODO: Should this propagate fast-math-flags?
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
DAG.getConstantFP(1.0f, DL, MVT::f32),
Op.getOperand(1));
@@ -1167,45 +1160,6 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
return SDValue();
}
-// FIXME: Remove this when combines added to DAGCombiner.
-SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- switch (CCOpcode) {
- case ISD::SETULE:
- case ISD::SETULT: {
- unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETLE:
- case ISD::SETLT: {
- unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETGT:
- case ISD::SETGE: {
- unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- case ISD::SETUGE:
- case ISD::SETUGT: {
- unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
- default:
- return SDValue();
- }
-}
-
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1260,7 +1214,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
EVT PtrVT = BasePtr.getValueType();
EVT MemVT = Load->getMemoryVT();
SDLoc SL(Op);
- MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+ const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
EVT LoVT, HiVT;
EVT LoMemVT, HiMemVT;
@@ -1269,23 +1224,27 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+
+ unsigned Size = LoMemVT.getStoreSize();
+ unsigned BaseAlign = Load->getAlignment();
+ unsigned HiAlign = MinAlign(BaseAlign, Size);
+
SDValue LoLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
Load->getChain(), BasePtr,
SrcValue,
LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
+ Load->isInvariant(), BaseAlign);
SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(LoMemVT.getStoreSize(), SL,
- PtrVT));
+ DAG.getConstant(Size, SL, PtrVT));
SDValue HiLoad
= DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
Load->getChain(), HiPtr,
SrcValue.getWithOffset(LoMemVT.getStoreSize()),
HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->isInvariant(), Load->getAlignment());
+ Load->isInvariant(), HiAlign);
SDValue Ops[] = {
DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
@@ -1415,7 +1374,11 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
DAG.getConstant(LoMemVT.getStoreSize(), SL,
PtrVT));
- MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+ const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
+ unsigned BaseAlign = Store->getAlignment();
+ unsigned Size = LoMemVT.getStoreSize();
+ unsigned HiAlign = MinAlign(BaseAlign, Size);
+
SDValue LoStore
= DAG.getTruncStore(Chain, SL, Lo,
BasePtr,
@@ -1423,15 +1386,15 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
LoMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
- Store->getAlignment());
+ BaseAlign);
SDValue HiStore
= DAG.getTruncStore(Chain, SL, Hi,
HiPtr,
- SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+ SrcValue.getWithOffset(Size),
HiMemVT,
Store->isNonTemporal(),
Store->isVolatile(),
- Store->getAlignment());
+ HiAlign);
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
}
@@ -1529,7 +1492,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
Store->getValue().getValueType().isVector()) {
- return ScalarizeVectorStore(Op, DAG);
+ return SplitVectorStore(Op, DAG);
}
EVT MemVT = Store->getMemoryVT();
@@ -1630,6 +1593,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
// float fb = (float)ib;
SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
+ // TODO: Should this propagate fast-math-flags?
// float fq = native_divide(fa, fb);
SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@@ -1940,6 +1904,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@@ -1968,6 +1934,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2045,6 +2012,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
@@ -2074,6 +2043,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ // TODO: Should this propagate fast-math-flags?
+
SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@@ -2184,6 +2155,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
@@ -2206,7 +2178,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
-
+ // TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
}
@@ -2231,6 +2203,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
DAG.getConstant(1, DL, MVT::i32));
SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+ // TODO: Should this propagate fast-math-flags?
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@@ -2257,7 +2230,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
MVT::f64);
SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
MVT::f64);
-
+ // TODO: Should this propagate fast-math-flags?
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
@@ -2511,12 +2484,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
if (VT == MVT::f32)
return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-
- // TODO: Implement min / max Evergreen instructions.
- if (VT == MVT::i32 &&
- Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
- return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
- }
}
break;
@@ -2652,20 +2619,14 @@ bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->isExactlyValue(1.0);
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
+ return isAllOnesConstant(Op);
}
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
return CFP->getValueAPF().isZero();
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
+ return isNullConstant(Op);
}
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
@@ -2738,7 +2699,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
- NODE_NAME_CASE(BREV)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
@@ -2893,8 +2853,7 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
unsigned SignBits = 32 - Width->getZExtValue() + 1;
- ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Offset || !Offset->isNullValue())
+ if (!isNullConstant(Op.getOperand(1)))
return SignBits;
// TODO: Could probably figure something out with non-0 offsets.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 478b203..7314cc0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -138,6 +138,7 @@ public:
bool storeOfVectorConstantIsCheap(EVT MemVT,
unsigned NumElem,
unsigned AS) const override;
+ bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
@@ -149,6 +150,9 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const;
+
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void ReplaceNodeResults(SDNode * N,
@@ -165,14 +169,6 @@ public:
SDValue False,
SDValue CC,
DAGCombinerInfo &DCI) const;
- SDValue CombineIMinMax(SDLoc DL,
- EVT VT,
- SDValue LHS,
- SDValue RHS,
- SDValue True,
- SDValue False,
- SDValue CC,
- SelectionDAG &DAG) const;
const char* getTargetNodeName(unsigned Opcode) const override;
@@ -216,7 +212,7 @@ public:
/// \brief Helper function that returns the byte offset of the given
/// type of implicit parameter.
- unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+ uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
};
@@ -267,7 +263,6 @@ enum NodeType : unsigned {
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
- BREV, // Reverse bits.
MUL_U24,
MUL_I24,
MAD_U24,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 15a3d54..a266e71 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -164,11 +164,6 @@ MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
// TODO: Implement this function
return nullptr;
}
-bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- // TODO: Implement this function
- return false;
-}
bool
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad,
@@ -312,7 +307,9 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+ unsigned IgnoredFrameReg;
+ Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
+ MF, -1, IgnoredFrameReg);
return getIndirectIndexBegin(MF) + Offset;
}
@@ -367,3 +364,14 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
return MCOp;
}
+
+ArrayRef<std::pair<int, const char *>>
+AMDGPUInstrInfo::getSerializableTargetIndices() const {
+ static const std::pair<int, const char *> TargetIndices[] = {
+ {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
+ {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
+ return makeArrayRef(TargetIndices);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 86d3962..53e8b23 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -103,8 +103,6 @@ public:
/// read or write or -1 if indirect addressing is not used by this program.
int getIndirectIndexEnd(const MachineFunction &MF) const;
- bool canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr *> &NewMIs) const override;
@@ -147,6 +145,9 @@ public:
return get(pseudoToMCOpcode(Opcode));
}
+ ArrayRef<std::pair<int, const char *>>
+ getSerializableTargetIndices() const override;
+
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//
@@ -195,6 +196,7 @@ public:
};
namespace AMDGPU {
+ LLVM_READONLY
int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
} // End namespace AMDGPU
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b413897..70e589c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,8 +191,6 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
-def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
-
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 72cab39..11f6139 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -514,7 +514,7 @@ class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
SubRegIndex sub_reg>
: Pat<
- (sub_type (vector_extract vec_type:$src, sub_idx)),
+ (sub_type (extractelt vec_type:$src, sub_idx)),
(EXTRACT_SUBREG $src, sub_reg)
>;
@@ -522,7 +522,7 @@ class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
class Insert_Element <ValueType elem_type, ValueType vec_type,
int sub_idx, SubRegIndex sub_reg>
: Pat <
- (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+ (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
(INSERT_SUBREG $vec, $elem, sub_reg)
>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ab489cd..1de3546 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -69,8 +69,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
- def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
+ def int_AMDGPU_barrier_local : Intrinsic<[], [], [IntrConvergent]>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], [IntrConvergent]>;
}
// Legacy names for compatibility.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 2083146..dfc652f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -61,7 +61,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_Register:
- MCOp = MCOperand::createReg(MO.getReg());
+ MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
@@ -73,13 +73,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
break;
}
- case MachineOperand::MO_TargetIndex: {
- assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
- MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
- MCOp = MCOperand::createExpr(Expr);
- break;
- }
case MachineOperand::MO_ExternalSymbol: {
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
@@ -104,10 +97,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
#endif
if (MI->isBundle()) {
const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = MI;
- ++I;
- while (I != MBB->end() && I->isInsideBundle()) {
- EmitInstruction(I);
+ MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+ while (I != MBB->instr_end() && I->isInsideBundle()) {
+ EmitInstruction(&*I);
++I;
}
} else {
@@ -136,8 +128,6 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
MF->getSubtarget<MCSubtargetInfo>());
- CodeStream.flush();
-
HexLines.resize(HexLines.size() + 1);
std::string &HexLine = HexLines.back();
raw_string_ostream HexStream(HexLine);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 21c7da6..5413717 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,11 +1,10 @@
#include "AMDGPUMachineFunction.h"
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
using namespace llvm;
-static const char *const ShaderTypeAttribute = "ShaderType";
-
// Pin the vtable to this file.
void AMDGPUMachineFunction::anchor() {}
@@ -13,13 +12,9 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
ShaderType(ShaderType::COMPUTE),
LDSSize(0),
+ ABIArgOffset(0),
ScratchSize(0),
IsKernel(true) {
- Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType))
- llvm_unreachable("Can't parse shader type!");
- }
+ ShaderType = AMDGPU::getShaderType(*MF.getFunction());
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f5e4694..46fcee8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -37,6 +37,11 @@ public:
return ShaderType;
}
+ bool isKernel() const {
+ // FIXME: Assume everything is a kernel until function calls are supported.
+ return true;
+ }
+
unsigned ScratchSize;
bool IsKernel;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
new file mode 100644
index 0000000..554bf1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
@@ -0,0 +1,373 @@
+//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass resolves calls to OpenCL image attribute, image resource ID and
+/// sampler resource ID getter functions.
+///
+/// Image attributes (size and format) are expected to be passed to the kernel
+/// as kernel arguments immediately following the image argument itself,
+/// therefore this pass adds image size and format arguments to the kernel
+/// functions in the module. The kernel functions with image arguments are
+/// re-created using the new signature. The new arguments are added to the
+/// kernel metadata with kernel_arg_type set to "image_size" or "image_format".
+/// Note: this pass may invalidate pointers to functions.
+///
+/// Resource IDs of read-only images, write-only images and samplers are
+/// defined to be their index among the kernel arguments of the same
+/// type and access qualifier.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+StringRef GetImageSizeFunc = "llvm.OpenCL.image.get.size";
+StringRef GetImageFormatFunc = "llvm.OpenCL.image.get.format";
+StringRef GetImageResourceIDFunc = "llvm.OpenCL.image.get.resource.id";
+StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id";
+
+StringRef ImageSizeArgMDType = "__llvm_image_size";
+StringRef ImageFormatArgMDType = "__llvm_image_format";
+
+StringRef KernelsMDNodeName = "opencl.kernels";
+StringRef KernelArgMDNodeNames[] = {
+ "kernel_arg_addr_space",
+ "kernel_arg_access_qual",
+ "kernel_arg_type",
+ "kernel_arg_base_type",
+ "kernel_arg_type_qual"};
+const unsigned NumKernelArgMDNodes = 5;
+
+typedef SmallVector<Metadata *, 8> MDVector;
+struct KernelArgMD {
+ MDVector ArgVector[NumKernelArgMDNodes];
+};
+
+} // end anonymous namespace
+
+static inline bool
+IsImageType(StringRef TypeString) {
+ return TypeString == "image2d_t" || TypeString == "image3d_t";
+}
+
+static inline bool
+IsSamplerType(StringRef TypeString) {
+ return TypeString == "sampler_t";
+}
+
+static Function *
+GetFunctionFromMDNode(MDNode *Node) {
+ if (!Node)
+ return nullptr;
+
+ size_t NumOps = Node->getNumOperands();
+ if (NumOps != NumKernelArgMDNodes + 1)
+ return nullptr;
+
+ auto F = mdconst::dyn_extract<Function>(Node->getOperand(0));
+ if (!F)
+ return nullptr;
+
+ // Sanity checks.
+ size_t ExpectNumArgNodeOps = F->arg_size() + 1;
+ for (size_t i = 0; i < NumKernelArgMDNodes; ++i) {
+ MDNode *ArgNode = dyn_cast_or_null<MDNode>(Node->getOperand(i + 1));
+ if (ArgNode->getNumOperands() != ExpectNumArgNodeOps)
+ return nullptr;
+ if (!ArgNode->getOperand(0))
+ return nullptr;
+
+ // FIXME: It should be possible to do image lowering when some metadata
+ // args missing or not in the expected order.
+ MDString *StringNode = dyn_cast<MDString>(ArgNode->getOperand(0));
+ if (!StringNode || StringNode->getString() != KernelArgMDNodeNames[i])
+ return nullptr;
+ }
+
+ return F;
+}
+
+static StringRef
+AccessQualFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+ MDNode *ArgAQNode = cast<MDNode>(KernelMDNode->getOperand(2));
+ return cast<MDString>(ArgAQNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static StringRef
+ArgTypeFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+ MDNode *ArgTypeNode = cast<MDNode>(KernelMDNode->getOperand(3));
+ return cast<MDString>(ArgTypeNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static MDVector
+GetArgMD(MDNode *KernelMDNode, unsigned OpIdx) {
+ MDVector Res;
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+ MDNode *Node = cast<MDNode>(KernelMDNode->getOperand(i + 1));
+ Res.push_back(Node->getOperand(OpIdx));
+ }
+ return Res;
+}
+
+static void
+PushArgMD(KernelArgMD &MD, const MDVector &V) {
+ assert(V.size() == NumKernelArgMDNodes);
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+ MD.ArgVector[i].push_back(V[i]);
+ }
+}
+
+namespace {
+
+class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
+ static char ID;
+
+ LLVMContext *Context;
+ Type *Int32Type;
+ Type *ImageSizeType;
+ Type *ImageFormatType;
+ SmallVector<Instruction *, 4> InstsToErase;
+
+ bool replaceImageUses(Argument &ImageArg, uint32_t ResourceID,
+ Argument &ImageSizeArg,
+ Argument &ImageFormatArg) {
+ bool Modified = false;
+
+ for (auto &Use : ImageArg.uses()) {
+ auto Inst = dyn_cast<CallInst>(Use.getUser());
+ if (!Inst) {
+ continue;
+ }
+
+ Function *F = Inst->getCalledFunction();
+ if (!F)
+ continue;
+
+ Value *Replacement = nullptr;
+ StringRef Name = F->getName();
+ if (Name.startswith(GetImageResourceIDFunc)) {
+ Replacement = ConstantInt::get(Int32Type, ResourceID);
+ } else if (Name.startswith(GetImageSizeFunc)) {
+ Replacement = &ImageSizeArg;
+ } else if (Name.startswith(GetImageFormatFunc)) {
+ Replacement = &ImageFormatArg;
+ } else {
+ continue;
+ }
+
+ Inst->replaceAllUsesWith(Replacement);
+ InstsToErase.push_back(Inst);
+ Modified = true;
+ }
+
+ return Modified;
+ }
+
+ bool replaceSamplerUses(Argument &SamplerArg, uint32_t ResourceID) {
+ bool Modified = false;
+
+ for (const auto &Use : SamplerArg.uses()) {
+ auto Inst = dyn_cast<CallInst>(Use.getUser());
+ if (!Inst) {
+ continue;
+ }
+
+ Function *F = Inst->getCalledFunction();
+ if (!F)
+ continue;
+
+ Value *Replacement = nullptr;
+ StringRef Name = F->getName();
+ if (Name == GetSamplerResourceIDFunc) {
+ Replacement = ConstantInt::get(Int32Type, ResourceID);
+ } else {
+ continue;
+ }
+
+ Inst->replaceAllUsesWith(Replacement);
+ InstsToErase.push_back(Inst);
+ Modified = true;
+ }
+
+ return Modified;
+ }
+
+ bool replaceImageAndSamplerUses(Function *F, MDNode *KernelMDNode) {
+ uint32_t NumReadOnlyImageArgs = 0;
+ uint32_t NumWriteOnlyImageArgs = 0;
+ uint32_t NumSamplerArgs = 0;
+
+ bool Modified = false;
+ InstsToErase.clear();
+ for (auto ArgI = F->arg_begin(); ArgI != F->arg_end(); ++ArgI) {
+ Argument &Arg = *ArgI;
+ StringRef Type = ArgTypeFromMD(KernelMDNode, Arg.getArgNo());
+
+ // Handle image types.
+ if (IsImageType(Type)) {
+ StringRef AccessQual = AccessQualFromMD(KernelMDNode, Arg.getArgNo());
+ uint32_t ResourceID;
+ if (AccessQual == "read_only") {
+ ResourceID = NumReadOnlyImageArgs++;
+ } else if (AccessQual == "write_only") {
+ ResourceID = NumWriteOnlyImageArgs++;
+ } else {
+ llvm_unreachable("Wrong image access qualifier.");
+ }
+
+ Argument &SizeArg = *(++ArgI);
+ Argument &FormatArg = *(++ArgI);
+ Modified |= replaceImageUses(Arg, ResourceID, SizeArg, FormatArg);
+
+ // Handle sampler type.
+ } else if (IsSamplerType(Type)) {
+ uint32_t ResourceID = NumSamplerArgs++;
+ Modified |= replaceSamplerUses(Arg, ResourceID);
+ }
+ }
+ for (unsigned i = 0; i < InstsToErase.size(); ++i) {
+ InstsToErase[i]->eraseFromParent();
+ }
+
+ return Modified;
+ }
+
+ std::tuple<Function *, MDNode *>
+ addImplicitArgs(Function *F, MDNode *KernelMDNode) {
+ bool Modified = false;
+
+ FunctionType *FT = F->getFunctionType();
+ SmallVector<Type *, 8> ArgTypes;
+
+ // Metadata operands for new MDNode.
+ KernelArgMD NewArgMDs;
+ PushArgMD(NewArgMDs, GetArgMD(KernelMDNode, 0));
+
+ // Add implicit arguments to the signature.
+ for (unsigned i = 0; i < FT->getNumParams(); ++i) {
+ ArgTypes.push_back(FT->getParamType(i));
+ MDVector ArgMD = GetArgMD(KernelMDNode, i + 1);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ if (!IsImageType(ArgTypeFromMD(KernelMDNode, i)))
+ continue;
+
+ // Add size implicit argument.
+ ArgTypes.push_back(ImageSizeType);
+ ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageSizeArgMDType);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ // Add format implicit argument.
+ ArgTypes.push_back(ImageFormatType);
+ ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageFormatArgMDType);
+ PushArgMD(NewArgMDs, ArgMD);
+
+ Modified = true;
+ }
+ if (!Modified) {
+ return std::make_tuple(nullptr, nullptr);
+ }
+
+ // Create function with new signature and clone the old body into it.
+ auto NewFT = FunctionType::get(FT->getReturnType(), ArgTypes, false);
+ auto NewF = Function::Create(NewFT, F->getLinkage(), F->getName());
+ ValueToValueMapTy VMap;
+ auto NewFArgIt = NewF->arg_begin();
+ for (auto &Arg: F->args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ if (IsImageType(ArgTypeFromMD(KernelMDNode, Arg.getArgNo()))) {
+ (NewFArgIt++)->setName(Twine("__size_") + ArgName);
+ (NewFArgIt++)->setName(Twine("__format_") + ArgName);
+ }
+ }
+ SmallVector<ReturnInst*, 8> Returns;
+ CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns);
+
+ // Build new MDNode.
+ SmallVector<llvm::Metadata *, 6> KernelMDArgs;
+ KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
+ for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
+ KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
+ MDNode *NewMDNode = MDNode::get(*Context, KernelMDArgs);
+
+ return std::make_tuple(NewF, NewMDNode);
+ }
+
+ bool transformKernels(Module &M) {
+ NamedMDNode *KernelsMDNode = M.getNamedMetadata(KernelsMDNodeName);
+ if (!KernelsMDNode)
+ return false;
+
+ bool Modified = false;
+ for (unsigned i = 0; i < KernelsMDNode->getNumOperands(); ++i) {
+ MDNode *KernelMDNode = KernelsMDNode->getOperand(i);
+ Function *F = GetFunctionFromMDNode(KernelMDNode);
+ if (!F)
+ continue;
+
+ Function *NewF;
+ MDNode *NewMDNode;
+ std::tie(NewF, NewMDNode) = addImplicitArgs(F, KernelMDNode);
+ if (NewF) {
+ // Replace old function and metadata with new ones.
+ F->eraseFromParent();
+ M.getFunctionList().push_back(NewF);
+ M.getOrInsertFunction(NewF->getName(), NewF->getFunctionType(),
+ NewF->getAttributes());
+ KernelsMDNode->setOperand(i, NewMDNode);
+
+ F = NewF;
+ KernelMDNode = NewMDNode;
+ Modified = true;
+ }
+
+ Modified |= replaceImageAndSamplerUses(F, KernelMDNode);
+ }
+
+ return Modified;
+ }
+
+ public:
+ AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override {
+ Context = &M.getContext();
+ Int32Type = Type::getInt32Ty(M.getContext());
+ ImageSizeType = ArrayType::get(Int32Type, 3);
+ ImageFormatType = ArrayType::get(Int32Type, 2);
+
+ return transformKernels(M);
+ }
+
+ const char *getPassName() const override {
+ return "AMDGPU OpenCL Image Type Pass";
+ }
+};
+
+char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
+
+} // end anonymous namespace
+
+ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() {
+ return new AMDGPUOpenCLImageTypeLoweringPass();
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 57b7a73..87d50d5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -54,7 +54,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
- const FunctionType *FTy = F.getFunctionType();
+ FunctionType *FTy = F.getFunctionType();
LocalMemAvailable = ST.getLocalMemorySize();
@@ -63,7 +63,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// possible these arguments require the entire local memory space, so
// we cannot use local memory in the pass.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
- const Type *ParamTy = FTy->getParamType(i);
+ Type *ParamTy = FTy->getParamType(i);
if (ParamTy->isPointerTy() &&
ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
LocalMemAvailable = 0;
@@ -77,7 +77,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Check how much local memory is being used by global objects
for (Module::global_iterator I = Mod->global_begin(),
E = Mod->global_end(); I != E; ++I) {
- GlobalVariable *GV = I;
+ GlobalVariable *GV = &*I;
PointerType *GVTy = GV->getType();
if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
continue;
@@ -101,7 +101,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
return false;
}
-static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+static VectorType *arrayTypeToVecType(Type *ArrayTy) {
return VectorType::get(ArrayTy->getArrayElementType(),
ArrayTy->getArrayNumElements());
}
@@ -276,6 +276,9 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+ if (!I.isStaticAlloca())
+ return;
+
IRBuilder<> Builder(&I);
// First try to replace the alloca with a vector
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index cfd800b..0344834 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -37,10 +37,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
assert(!"Unimplemented"); return BitVector();
}
- virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
- assert(!"Unimplemented"); return nullptr;
- }
-
virtual unsigned getHWRegIndex(unsigned Reg) const {
assert(!"Unimplemented"); return 0;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5f32a65..44e0c47 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -16,6 +16,7 @@
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
#include "R600MachineScheduler.h"
+#include "SIFrameLowering.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -44,6 +45,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// disable it.
SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+ if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
+ FullFS += "+flat-for-global,";
FullFS += FS;
if (GPU == "" && TT.getArch() == Triple::amdgcn)
@@ -67,26 +70,36 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
DumpCode(false), R600ALUInst(false), HasVertexCache(false),
TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
- CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
- EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
- EnableUnsafeDSOffsetFolding(false),
+ CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
+ EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
+ EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- 64 * 16, // Maximum stack alignment (long16)
- 0),
+ FrameLowering(nullptr),
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
initializeSubtargetDependencies(TT, GPU, FS);
+ const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16)
+
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
InstrInfo.reset(new R600InstrInfo(*this));
TLInfo.reset(new R600TargetLowering(TM, *this));
+
+ // FIXME: Should have R600 specific FrameLowering
+ FrameLowering.reset(new AMDGPUFrameLowering(
+ TargetFrameLowering::StackGrowsUp,
+ MaxStackAlign,
+ 0));
} else {
InstrInfo.reset(new SIInstrInfo(*this));
TLInfo.reset(new SITargetLowering(TM, *this));
+ FrameLowering.reset(new SIFrameLowering(
+ TargetFrameLowering::StackGrowsUp,
+ MaxStackAlign,
+ 0));
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 735f01d..9c7bb88 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
@@ -12,17 +12,15 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
-#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+
#include "AMDGPU.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUISelLowering.h"
#include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "AMDKernelCodeT.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -72,6 +70,7 @@ private:
bool FastFMAF32;
bool CaymanISA;
bool FlatAddressSpace;
+ bool FlatForGlobal;
bool EnableIRStructurizer;
bool EnablePromoteAlloca;
bool EnableIfCvt;
@@ -88,10 +87,10 @@ private:
bool CIInsts;
bool FeatureDisable;
int LDSBankCount;
- unsigned IsaVersion;
+ unsigned IsaVersion;
bool EnableHugeScratchBuffer;
- AMDGPUFrameLowering FrameLowering;
+ std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
InstrItineraryData InstrItins;
@@ -104,7 +103,7 @@ public:
StringRef GPU, StringRef FS);
const AMDGPUFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return FrameLowering.get();
}
const AMDGPUInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
@@ -161,6 +160,10 @@ public:
return FlatAddressSpace;
}
+ bool useFlatForGlobal() const {
+ return FlatForGlobal;
+ }
+
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
@@ -305,6 +308,9 @@ public:
return isAmdHsaOS() ? 0 : 36;
}
+ unsigned getMaxNumUserSGPRs() const {
+ return 16;
+ }
};
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2297b52..22f85b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetMachine.h"
+#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "AMDGPUTargetTransformInfo.h"
#include "R600ISelLowering.h"
@@ -41,6 +42,23 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
+
+ PassRegistry *PR = PassRegistry::getPassRegistry();
+ initializeSILowerI1CopiesPass(*PR);
+ initializeSIFixSGPRCopiesPass(*PR);
+ initializeSIFoldOperandsPass(*PR);
+ initializeSIFixSGPRLiveRangesPass(*PR);
+ initializeSIFixControlFlowLiveIntervalsPass(*PR);
+ initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
+ initializeAMDGPUAnnotateUniformValuesPass(*PR);
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+ if (TT.getOS() == Triple::AMDHSA)
+ return make_unique<AMDGPUHSATargetObjectFile>();
+
+ return make_unique<AMDGPUTargetObjectFile>();
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
@@ -72,15 +90,13 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OptLevel)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
OptLevel),
- TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+ TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this),
IntrinsicInfo() {
setRequiresStructuredCFG(true);
initAsmInfo();
}
-AMDGPUTargetMachine::~AMDGPUTargetMachine() {
- delete TLOF;
-}
+AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
//===----------------------------------------------------------------------===//
// R600 Target Machine (R600 -> Cayman)
@@ -110,7 +126,13 @@ namespace {
class AMDGPUPassConfig : public TargetPassConfig {
public:
AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+
+ // Exceptions and StackMaps are not supported, so these passes will never do
+ // anything.
+ disablePass(&StackMapLivenessID);
+ disablePass(&FuncletLayoutID);
+ }
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
return getTM<AMDGPUTargetMachine>();
@@ -126,8 +148,9 @@ public:
void addIRPasses() override;
void addCodeGenPrepare() override;
- virtual bool addPreISel() override;
- virtual bool addInstSelector() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addGCPasses() override;
};
class R600PassConfig : public AMDGPUPassConfig {
@@ -147,6 +170,8 @@ public:
: AMDGPUPassConfig(TM, PM) { }
bool addPreISel() override;
bool addInstSelector() override;
+ void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addPreSched2() override;
@@ -156,7 +181,7 @@ public:
} // End of anonymous namespace
TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(
AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
});
@@ -172,6 +197,10 @@ void AMDGPUPassConfig::addIRPasses() {
// functions, then we will generate code for the first function
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
+
+ // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
+ addPass(createAMDGPUOpenCLImageTypeLoweringPass());
+
TargetPassConfig::addIRPasses();
}
@@ -198,6 +227,11 @@ bool AMDGPUPassConfig::addInstSelector() {
return false;
}
+bool AMDGPUPassConfig::addGCPasses() {
+ // Do nothing. GC is not supported.
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// R600 Pass Setup
//===----------------------------------------------------------------------===//
@@ -238,16 +272,23 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
bool GCNPassConfig::addPreISel() {
AMDGPUPassConfig::addPreISel();
+
+ // FIXME: We need to run a pass to propagate the attributes when calls are
+ // supported.
+ addPass(&AMDGPUAnnotateKernelFeaturesID);
+
addPass(createSinkingPass());
addPass(createSITypeRewriter());
addPass(createSIAnnotateControlFlowPass());
+ addPass(createAMDGPUAnnotateUniformValues());
+
return false;
}
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(createSILowerI1CopiesPass());
- addPass(createSIFixSGPRCopiesPass(*TM));
+ addPass(&SIFixSGPRCopiesID);
addPass(createSIFoldOperandsPass());
return false;
}
@@ -259,7 +300,6 @@ void GCNPassConfig::addPreRegAlloc() {
// earlier passes might recompute live intervals.
// TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
if (getOptLevel() > CodeGenOpt::None) {
- initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
}
@@ -269,16 +309,27 @@ void GCNPassConfig::addPreRegAlloc() {
// This should be run after scheduling, but before register allocation. It
// also need extra copies to the address operand to be eliminated.
- initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
insertPass(&MachineSchedulerID, &RegisterCoalescerID);
}
addPass(createSIShrinkInstructionsPass(), false);
- addPass(createSIFixSGPRLiveRangesPass(), false);
+}
+
+void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+ addPass(&SIFixSGPRLiveRangesID);
+ TargetPassConfig::addFastRegAlloc(RegAllocPass);
+}
+
+void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ // We want to run this after LiveVariables is computed to avoid computing them
+ // twice.
+ // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure
+ // that needs to be fixed.
+ insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false);
+ TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
}
void GCNPassConfig::addPostRegAlloc() {
- addPass(createSIPrepareScratchRegs(), false);
addPass(createSIShrinkInstructionsPass(), false);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 14792e3..236e3f8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -32,7 +32,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
private:
protected:
- TargetLoweringObjectFile *TLOF;
+ std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUSubtarget Subtarget;
AMDGPUIntrinsicInfo IntrinsicInfo;
@@ -52,7 +52,7 @@ public:
TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF;
+ return TLOF.get();
}
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
new file mode 100644
index 0000000..e050f21
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetObjectFile.h"
+#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Object File
+//===----------------------------------------------------------------------===//
+
+MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+ return TextSection;
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+//===----------------------------------------------------------------------===//
+// HSA Object File
+//===----------------------------------------------------------------------===//
+
+
+void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM){
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+
+ TextSection = AMDGPU::getHSATextSection(Ctx);
+
+ DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
+ DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx);
+
+ RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocationSection(
+ const char *SectionName) const {
+ return cast<MCSectionELF>(DataGlobalAgentSection)
+ ->getSectionName()
+ .equals(SectionName);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const {
+ // Read-only segments can only have agent allocation.
+ return AMDGPU::isReadOnlySegment(GV) ||
+ (AMDGPU::isGlobalSegment(GV) && GV->hasSection() &&
+ isAgentAllocationSection(GV->getSection()));
+}
+
+bool AMDGPUHSATargetObjectFile::isProgramAllocation(
+ const GlobalValue *GV) const {
+ // The default for global segments is program allocation.
+ return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV);
+}
+
+MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const {
+ if (Kind.isText() && !GV->hasComdat())
+ return getTextSection();
+
+ if (AMDGPU::isGlobalSegment(GV)) {
+ if (isAgentAllocation(GV))
+ return DataGlobalAgentSection;
+
+ if (isProgramAllocation(GV))
+ return DataGlobalProgramSection;
+ }
+
+ return AMDGPUTargetObjectFile::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
new file mode 100644
index 0000000..921341e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -0,0 +1,51 @@
+//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the AMDGPU-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
+ public:
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile {
+private:
+ MCSection *DataGlobalAgentSection;
+ MCSection *DataGlobalProgramSection;
+ MCSection *RodataReadonlyAgentSection;
+
+ bool isAgentAllocationSection(const char *SectionName) const;
+ bool isAgentAllocation(const GlobalValue *GV) const;
+ bool isProgramAllocation(const GlobalValue *GV) const;
+
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6dacc74..54a003d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -74,9 +74,109 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
}
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
+ return Vector ? 0 : 32;
+}
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
// Semi-arbitrary large amount.
return 64;
}
+
+unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode);
+ }
+}
+
+int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
+ const IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::not_intrinsic:
+ // This means we have an intrinsic that isn't defined in
+ // IntrinsicsAMDGPU.td
+ break;
+
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_mbcnt_hi:
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::r600_read_tidig_z:
+ return true;
+ }
+
+ StringRef Name = I->getCalledFunction()->getName();
+ switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
+ default:
+ return false;
+ case AMDGPUIntrinsic::SI_tid:
+ case AMDGPUIntrinsic::SI_fs_interp:
+ return true;
+ }
+}
+
+static bool isArgPassedInSGPR(const Argument *A) {
+ const Function *F = A->getParent();
+ unsigned ShaderType = AMDGPU::getShaderType(*F);
+
+ // Arguments to compute shaders are never a source of divergence.
+ if (ShaderType == ShaderType::COMPUTE)
+ return true;
+
+ // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+ if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
+ F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
+ return true;
+
+ // Everything else is in VGPRs.
+ return false;
+}
+
+///
+/// \returns true if the result of the value could potentially be
+/// different across workitems in a wavefront.
+bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
+
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return !isArgPassedInSGPR(A);
+
+ // Loads from the private address space are divergent, because threads
+ // can execute the load instruction with the same inputs and get different
+ // results.
+ //
+ // All other loads are not divergent, because if threads issue loads with the
+ // same arguments, they will always get the same result.
+ if (const LoadInst *Load = dyn_cast<LoadInst>(V))
+ return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
+ }
+
+ // Assume all function calls are a source of divergence.
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index dee0a69..976afb0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -60,6 +60,11 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
+
+ unsigned getCFInstrCost(unsigned Opcode);
+
+ int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
+ bool isSourceOfDivergence(const Value *V) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index d918ac3..917efd1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -185,7 +185,7 @@ protected:
MachinePostDominatorTree *PDT;
MachineLoopInfo *MLI;
const R600InstrInfo *TII;
- const AMDGPURegisterInfo *TRI;
+ const R600RegisterInfo *TRI;
// PRINT FUNCTIONS
/// Print the ordered Blocks.
@@ -881,7 +881,7 @@ bool AMDGPUCFGStructurizer::run() {
} //while, "one iteration" over the function.
MachineBasicBlock *EntryMBB =
- GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+ &*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
if (EntryMBB->succ_size() == 0) {
Finish = true;
DEBUG(
@@ -904,7 +904,7 @@ bool AMDGPUCFGStructurizer::run() {
} while (!Finish && MakeProgress);
// Misc wrap up to maintain the consistency of the Function representation.
- wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
+ wrapup(&*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
// Detach retired Block, release memory.
for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
@@ -1164,7 +1164,7 @@ int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
E = ContMBB.end(); It != E; ++It) {
- (*It)->removeSuccessor(LoopHeader);
+ (*It)->removeSuccessor(LoopHeader, true);
}
numLoopcontPatternMatch += NumCont;
@@ -1353,7 +1353,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// If MigrateTrue is true, then TrueBB is the block being "branched into"
// and if MigrateFalse is true, then FalseBB is the block being
// "branched into"
- //
+ //
// Here is the pseudo code for how I think the optimization should work:
// 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
// 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
@@ -1372,7 +1372,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
// the late machine optimization passes, however if we implement
// bool TargetRegisterInfo::requiresRegisterScavenging(
// const MachineFunction &MF)
- // and have it return true, liveness will be tracked correctly
+ // and have it return true, liveness will be tracked correctly
// by generic optimization passes. We will also need to make sure that
// all of our target-specific passes that run after regalloc and before
// the CFGStructurizer track liveness and we will need to modify this pass
@@ -1487,7 +1487,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
);
DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
- DstMBB->removeSuccessor(SrcMBB);
+ DstMBB->removeSuccessor(SrcMBB, true);
cloneSuccessorList(DstMBB, SrcMBB);
removeSuccessor(SrcMBB);
@@ -1537,9 +1537,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
if (TrueMBB) {
MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
- MBB->removeSuccessor(TrueMBB);
+ MBB->removeSuccessor(TrueMBB, true);
if (LandMBB && TrueMBB->succ_size()!=0)
- TrueMBB->removeSuccessor(LandMBB);
+ TrueMBB->removeSuccessor(LandMBB, true);
retireBlock(TrueMBB);
MLI->removeBlock(TrueMBB);
}
@@ -1548,9 +1548,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
insertInstrBefore(I, AMDGPU::ELSE);
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
- MBB->removeSuccessor(FalseMBB);
+ MBB->removeSuccessor(FalseMBB, true);
if (LandMBB && FalseMBB->succ_size() != 0)
- FalseMBB->removeSuccessor(LandMBB);
+ FalseMBB->removeSuccessor(LandMBB, true);
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
}
@@ -1570,8 +1570,7 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
- DstBlk->addSuccessor(LandMBB);
- DstBlk->removeSuccessor(DstBlk);
+ DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
@@ -1592,7 +1591,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
//now branchInst can be erase safely
BranchMI->eraseFromParent();
//now take care of successors, retire blocks
- ExitingMBB->removeSuccessor(LandMBB);
+ ExitingMBB->removeSuccessor(LandMBB, true);
}
void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
@@ -1666,8 +1665,7 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
//srcBlk, oldBlk, newBlk
- PredMBB->removeSuccessor(MBB);
- PredMBB->addSuccessor(CloneMBB);
+ PredMBB->replaceSuccessor(MBB, CloneMBB);
// add all successor to cloneBlk
cloneSuccessorList(CloneMBB, MBB);
@@ -1695,10 +1693,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
);
SpliceEnd = SrcMBB->end();
} else {
- DEBUG(
- dbgs() << "migrateInstruction see branch instr\n" ;
- BranchMI->dump();
- );
+ DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
SpliceEnd = BranchMI;
}
DEBUG(
@@ -1711,7 +1706,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
DEBUG(
dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
- << "srcSize = " << SrcMBB->size() << "\n";
+ << "srcSize = " << SrcMBB->size() << '\n';
);
}
@@ -1743,7 +1738,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
// test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
while ((BranchMI = getLoopendBlockBranchInstr(MBB))
&& isUncondBranch(BranchMI)) {
- DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
+ DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
}
}
@@ -1759,10 +1754,10 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
assert(BranchMI && isCondBranch(BranchMI));
- DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
+ DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
BranchMI->eraseFromParent();
SHOWNEWBLK(MBB1, "Removing redundant successor");
- MBB->removeSuccessor(MBB1);
+ MBB->removeSuccessor(MBB1, true);
}
void AMDGPUCFGStructurizer::addDummyExitBlock(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2018983..d9f753f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -28,7 +28,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
@@ -83,6 +85,7 @@ public:
unsigned RegNo;
int Modifiers;
const MCRegisterInfo *TRI;
+ const MCSubtargetInfo *STI;
bool IsForcedVOP3;
};
@@ -102,7 +105,7 @@ public:
}
void addRegOperands(MCInst &Inst, unsigned N) const {
- Inst.addOperand(MCOperand::createReg(getReg()));
+ Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
}
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
@@ -215,6 +218,10 @@ public:
(isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
}
+ bool isSCSrc64() const {
+ return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+ }
+
bool isVCSrc32() const {
return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
}
@@ -251,7 +258,22 @@ public:
return EndLoc;
}
- void print(raw_ostream &OS) const override { }
+ void print(raw_ostream &OS) const override {
+ switch (Kind) {
+ case Register:
+ OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
+ break;
+ case Immediate:
+ OS << getImm();
+ break;
+ case Token:
+ OS << '\'' << getToken() << '\'';
+ break;
+ case Expression:
+ OS << "<expr " << *Expr << '>';
+ break;
+ }
+ }
static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
enum ImmTy Type = ImmTyNone,
@@ -278,10 +300,12 @@ public:
static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
SMLoc E,
const MCRegisterInfo *TRI,
+ const MCSubtargetInfo *STI,
bool ForceVOP3) {
auto Op = llvm::make_unique<AMDGPUOperand>(Register);
Op->Reg.RegNo = RegNo;
Op->Reg.TRI = TRI;
+ Op->Reg.STI = STI;
Op->Reg.Modifiers = -1;
Op->Reg.IsForcedVOP3 = ForceVOP3;
Op->StartLoc = S;
@@ -301,14 +325,32 @@ public:
bool isDSOffset01() const;
bool isSWaitCnt() const;
bool isMubufOffset() const;
+ bool isSMRDOffset() const;
+ bool isSMRDLiteralOffset() const;
};
class AMDGPUAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
MCAsmParser &Parser;
unsigned ForcedEncodingSize;
+
+ bool isSI() const {
+ return AMDGPU::isSI(getSTI());
+ }
+
+ bool isCI() const {
+ return AMDGPU::isCI(getSTI());
+ }
+
+ bool isVI() const {
+ return AMDGPU::isVI(getSTI());
+ }
+
+ bool hasSGPR102_SGPR103() const {
+ return !isVI();
+ }
+
/// @name Auto-generated Match Functions
/// {
@@ -323,20 +365,34 @@ private:
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
+ bool ParseSectionDirectiveHSAText();
+ bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
+ bool ParseDirectiveAMDGPUHsaKernel();
+ bool ParseDirectiveAMDGPUHsaModuleGlobal();
+ bool ParseDirectiveAMDGPUHsaProgramGlobal();
+ bool ParseSectionDirectiveHSADataGlobalAgent();
+ bool ParseSectionDirectiveHSADataGlobalProgram();
+ bool ParseSectionDirectiveHSARodataReadonlyAgent();
public:
- AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+public:
+ enum AMDGPUMatchResultTy {
+ Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
+ };
+
+ AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
- ForcedEncodingSize(0){
+ : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
+ ForcedEncodingSize(0) {
+ MCAsmParserExtension::Initialize(Parser);
- if (STI.getFeatureBits().none()) {
+ if (getSTI().getFeatureBits().none()) {
// Set default features.
- STI.ToggleFeature("SOUTHERN_ISLANDS");
+ copySTI().ToggleFeature("SOUTHERN_ISLANDS");
}
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
AMDGPUTargetStreamer &getTargetStreamer() {
@@ -420,10 +476,10 @@ struct OptionalOperand {
}
-static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+static int getRegClass(bool IsVgpr, unsigned RegWidth) {
if (IsVgpr) {
switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
+ default: return -1;
case 1: return AMDGPU::VGPR_32RegClassID;
case 2: return AMDGPU::VReg_64RegClassID;
case 3: return AMDGPU::VReg_96RegClassID;
@@ -434,7 +490,7 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
}
switch (RegWidth) {
- default: llvm_unreachable("Unknown register width");
+ default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SReg_128RegClassID;
@@ -443,16 +499,16 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
}
}
-static unsigned getRegForName(const StringRef &RegName) {
+static unsigned getRegForName(StringRef RegName) {
return StringSwitch<unsigned>(RegName)
.Case("exec", AMDGPU::EXEC)
.Case("vcc", AMDGPU::VCC)
- .Case("flat_scr", AMDGPU::FLAT_SCR)
+ .Case("flat_scratch", AMDGPU::FLAT_SCR)
.Case("m0", AMDGPU::M0)
.Case("scc", AMDGPU::SCC)
- .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
- .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+ .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
+ .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
.Case("vcc_lo", AMDGPU::VCC_LO)
.Case("vcc_hi", AMDGPU::VCC_HI)
.Case("exec_lo", AMDGPU::EXEC_LO)
@@ -464,12 +520,14 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
const AsmToken Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- const StringRef &RegName = Tok.getString();
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+
+ StringRef RegName = Tok.getString();
RegNo = getRegForName(RegName);
if (RegNo) {
Parser.Lex();
- return false;
+ return !subtargetHasRegister(*TRI, RegNo);
}
// Match vgprs and sgprs
@@ -514,16 +572,24 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
RegIndexInClass = RegLo;
} else {
// SGPR registers are aligned. Max alignment is 4 dwords.
- RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+ unsigned Size = std::min(RegWidth, 4u);
+ if (RegLo % Size != 0)
+ return true;
+
+ RegIndexInClass = RegLo / Size;
}
}
- const MCRegisterInfo *TRC = getContext().getRegisterInfo();
- unsigned RC = getRegClass(IsVgpr, RegWidth);
- if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+ int RCID = getRegClass(IsVgpr, RegWidth);
+ if (RCID == -1)
return true;
- RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
- return false;
+
+ const MCRegisterClass RC = TRI->getRegClass(RCID);
+ if (RegIndexInClass >= RC.getNumRegs())
+ return true;
+
+ RegNo = RC.getRegister(RegIndexInClass);
+ return !subtargetHasRegister(*TRI, RegNo);
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -534,6 +600,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
(getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
return Match_InvalidOperand;
+ if ((TSFlags & SIInstrFlags::VOP3) &&
+ (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
+ getForcedEncodingSize() != 64)
+ return Match_PreferE32;
+
return Match_Success;
}
@@ -549,7 +620,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default: break;
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction not supported on this GPU");
@@ -592,6 +663,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
return Error(ErrorLoc, "invalid operand for instruction");
}
+ case Match_PreferE32:
+ return Error(IDLoc, "internal error: instruction without _e64 suffix "
+ "should be encoded as e32");
}
llvm_unreachable("Implement any new match types added!");
}
@@ -640,7 +714,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
Isa.Stepping,
"AMD", "AMDGPU");
@@ -852,7 +926,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
while (true) {
@@ -882,6 +956,64 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
return false;
}
+bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSATextSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef KernelName = Parser.getTok().getString();
+
+ getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
+ ELF::STT_AMDGPU_HSA_KERNEL);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef GlobalName = Parser.getTok().getIdentifier();
+
+ getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError("expected symbol name");
+
+ StringRef GlobalName = Parser.getTok().getIdentifier();
+
+ getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
+ Lex();
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSADataGlobalAgentSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSADataGlobalProgramSection(getContext()));
+ return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
+ getParser().getStreamer().SwitchSection(
+ AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
+ return false;
+}
+
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -894,6 +1026,55 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
+ if (IDVal == ".hsatext" || IDVal == ".text")
+ return ParseSectionDirectiveHSAText();
+
+ if (IDVal == ".amdgpu_hsa_kernel")
+ return ParseDirectiveAMDGPUHsaKernel();
+
+ if (IDVal == ".amdgpu_hsa_module_global")
+ return ParseDirectiveAMDGPUHsaModuleGlobal();
+
+ if (IDVal == ".amdgpu_hsa_program_global")
+ return ParseDirectiveAMDGPUHsaProgramGlobal();
+
+ if (IDVal == ".hsadata_global_agent")
+ return ParseSectionDirectiveHSADataGlobalAgent();
+
+ if (IDVal == ".hsadata_global_program")
+ return ParseSectionDirectiveHSADataGlobalProgram();
+
+ if (IDVal == ".hsarodata_readonly_agent")
+ return ParseSectionDirectiveHSARodataReadonlyAgent();
+
+ return true;
+}
+
+bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
+ unsigned RegNo) const {
+ if (isCI())
+ return true;
+
+ if (isSI()) {
+ // No flat_scr
+ switch (RegNo) {
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::FLAT_SCR_LO:
+ case AMDGPU::FLAT_SCR_HI:
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
+ // SI/CI have.
+ for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
+ R.isValid(); ++R) {
+ if (*R == RegNo)
+ return false;
+ }
+
return true;
}
@@ -943,13 +1124,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- APInt IntVal32(32, IntVal);
- if (IntVal32.getSExtValue() != IntVal) {
+ if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
Error(S, "invalid immediate: only 32-bit values are legal");
return MatchOperand_ParseFail;
}
- IntVal = IntVal32.getSExtValue();
if (Negate)
IntVal *= -1;
Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
@@ -1002,7 +1181,7 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
Operands.push_back(AMDGPUOperand::CreateReg(
- RegNo, S, E, getContext().getRegisterInfo(),
+ RegNo, S, E, getContext().getRegisterInfo(), &getSTI(),
isForcedVOP3()));
if (HasModifiers || Modifiers) {
@@ -1571,6 +1750,23 @@ AMDGPUAsmParser::parseR128(OperandVector &Operands) {
}
//===----------------------------------------------------------------------===//
+// smrd
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isSMRDOffset() const {
+
+ // FIXME: Support 20-bit offsets on VI. We need to to pass subtarget
+ // information here.
+ return isImm() && isUInt<8>(getImm());
+}
+
+bool AMDGPUOperand::isSMRDLiteralOffset() const {
+ // 32-bit literals are only supported on CI and we only want to use them
+ // when the offset is > 8-bits.
+ return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
+}
+
+//===----------------------------------------------------------------------===//
// vop3
//===----------------------------------------------------------------------===//
@@ -1653,8 +1849,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
}
void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
- unsigned i = 2;
+
+ unsigned i = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ if (Desc.getNumDefs() > 0) {
+ ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
+ }
std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
index 2f5fdbe..88a090d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@@ -8,6 +8,22 @@
//===----------------------------------------------------------------------===//
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
+// Remaining instructions:
+// S_CBRANCH_CDBGUSER
+// S_CBRANCH_CDBGSYS
+// S_CBRANCH_CDBGSYS_OR_USER
+// S_CBRANCH_CDBGSYS_AND_USER
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
def isCIVI : Predicate <
@@ -23,6 +39,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
let SubtargetPredicate = isCIVI in {
+let SchedRW = [WriteDoubleAdd] in {
defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
VOP_F64_F64, ftrunc
>;
@@ -35,82 +52,218 @@ defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
VOP_F64_F64, frint
>;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
VOP_F32_F32
>;
defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
VOP_F32_F32
>;
+} // End SchedRW = [WriteQuarterRate32]
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+ VOP_I32_I32_I32
+>;
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+ VOP_I32_I32_I32
+>;
+
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+ VOP_I64_I32_I32_I64
+>;
+
+// XXX - Does this set VCC?
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+ VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
+
+
+//===----------------------------------------------------------------------===//
+// DS Instructions
+//===----------------------------------------------------------------------===//
+defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
+
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+
+//===----------------------------------------------------------------------===//
+// SMRD Instructions
+//===----------------------------------------------------------------------===//
+
+defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
+ "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
+
+defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
+ "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
+>;
//===----------------------------------------------------------------------===//
// Flat Instructions
//===----------------------------------------------------------------------===//
-def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>;
-def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>;
-def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>;
-def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>;
-def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>;
-def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>;
-def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>;
-def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>;
-def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>;
-def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>;
-def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>;
-def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
- 0x1d, "flat_store_dwordx2", VReg_64
+defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
+ flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
+>;
+defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
+ flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
+>;
+defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
+ flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
+>;
+defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
+ flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
+;
+defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
+ flat<0xc, 0x14>, "flat_load_dword", VGPR_32
+>;
+defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
+ flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
+>;
+defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
+ flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
+>;
+defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
+ flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
+>;
+defm FLAT_STORE_BYTE : FLAT_Store_Helper <
+ flat<0x18>, "flat_store_byte", VGPR_32
+>;
+defm FLAT_STORE_SHORT : FLAT_Store_Helper <
+ flat <0x1a>, "flat_store_short", VGPR_32
+>;
+defm FLAT_STORE_DWORD : FLAT_Store_Helper <
+ flat<0x1c>, "flat_store_dword", VGPR_32
+>;
+defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+ flat<0x1d>, "flat_store_dwordx2", VReg_64
+>;
+defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+ flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
>;
-def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
- 0x1e, "flat_store_dwordx4", VReg_128
+defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+ flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
>;
-def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
- 0x1f, "flat_store_dwordx3", VReg_96
+defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
+ flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32
>;
-defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
- 0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64
->;
-defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>;
-defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>;
-defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>;
-defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>;
-defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>;
-defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>;
-defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>;
-defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>;
-defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>;
-defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>;
-defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>;
-defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>;
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
- 0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+ flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
+ flat<0x32, 0x42>, "flat_atomic_add", VGPR_32
+>;
+defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
+ flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32
+>;
+defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
+ flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32
+>;
+defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
+ flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32
+>;
+defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
+ flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32
+>;
+defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
+ flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32
+>;
+defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
+ flat<0x39, 0x48>, "flat_atomic_and", VGPR_32
+>;
+defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
+ flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32
+>;
+defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
+ flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32
+>;
+defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
+ flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32
+>;
+defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
+ flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32
+>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
+ flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64
>;
-defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>;
-defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
- 0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
->;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>;
-defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>;
-defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>;
-defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>;
-defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>;
-defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
- 0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+ flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
+ flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
+ flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
+ flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
+ flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
+ flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
+ flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
+ flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64
+>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
+ flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64
+>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
+ flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64
+>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
+ flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64
+>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
+ flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64
>;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>;
} // End SubtargetPredicate = isCIVI
+// CI Only flat instructions
+
+let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst in {
+
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
+ flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
+ flat<0x3f>, "flat_atomic_fmin", VGPR_32
+>;
+defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
+ flat<0x40>, "flat_atomic_fmax", VGPR_32
+>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
+ flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
+ flat<0x5f>, "flat_atomic_fmin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
+ flat<0x60>, "flat_atomic_fmax_x2", VReg_64
+>;
+
+} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+
//===----------------------------------------------------------------------===//
// Flat Patterns
//===----------------------------------------------------------------------===//
@@ -147,3 +300,80 @@ def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
} // End HasFlatAddressSpace predicate
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+ (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+ (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+ (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+ (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+ (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+ (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
+
+//===----------------------------------------------------------------------===//
+// Patterns to generate flat for global
+//===----------------------------------------------------------------------===//
+
+def useFlatForGlobal : Predicate <
+ "Subtarget->useFlatForGlobal() || "
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
+
+let Predicates = [useFlatForGlobal] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+// Patterns for global loads with no offset
+class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr)),
+ (inst $addr, 0, 0, 0)
+>;
+
+def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+
+class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (node vt:$data, i64:$addr),
+ (inst $data, $addr, 0, 0, 0)
+>;
+
+def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+ (vt (node i64:$addr, vt:$data)),
+ (inst $addr, $data, 0, 0)
+>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+} // End Predicates = [useFlatForGlobal]
diff --git a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index ba4df82..a6c3785 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -82,6 +82,10 @@ def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
+def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> {
+ let eop = 0; // This bit is not used on Cayman.
+}
+
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 7adcd46..779a14e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -40,6 +40,15 @@ class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
: EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
"MEM_RAT "#name, pattern>;
+class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
+ : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
+ i32imm:$rat_id, InstFlag:$eop),
+ "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
+ #!if(has_eop, ", $eop", ""),
+ [(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
+ R600_Reg128:$index_gpr,
+ (i32 imm:$rat_id))]>;
+
def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
(ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
"MSKOR $rw_gpr.XW, $index_gpr",
@@ -105,6 +114,8 @@ def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
>;
+def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>;
+
} // End usesCustomInserter = 1
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index e811d5c..a187de8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -283,8 +284,13 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
O << "4.0";
else if (Imm == DoubleToBits(-4.0))
O << "-4.0";
- else
- llvm_unreachable("64-bit literal constants not supported");
+ else {
+ assert(isUInt<32>(Imm));
+
+ // In rare situations, we will have a 32-bit literal in a 64-bit
+ // operand. This is technically allowed for the encoding of s_mov_b64.
+ O << formatHex(static_cast<uint64_t>(Imm));
+ }
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -592,11 +598,11 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
} else {
unsigned Stream = (SImm16 >> 8) & 0x3;
if (Op == 1)
- O << "cut";
+ O << "cut";
else if (Op == 2)
- O << "emit";
+ O << "emit";
else if (Op == 3)
- O << "emit-cut";
+ O << "emit-cut";
O << " stream " << Stream;
}
O << "), [m0] ";
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 14fb511..90541d8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -13,9 +13,7 @@
#ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
#define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
-#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 4434d9b..60e8c8f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -99,14 +99,22 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
case AMDGPU::fixup_si_rodata: {
uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- *Dst = Value;
- break;
- }
-
- case AMDGPU::fixup_si_end_of_text: {
- uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
- // The value points to the last instruction in the text section, so we
- // need to add 4 bytes to get to the start of the constants.
+ // We emit constant data at the end of the text section and generate its
+ // address using the following code sequence:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // the fixup replaces $symbol with a literal constant, which is a
+ // pc-relative offset from the encoding of the $symbol operand to the
+ // constant data.
+ //
+ // What we want here is an offset from the start of the s_add_u32
+ // instruction to the constant data, but since the encoding of $symbol
+ // starts 4 bytes after the start of the add instruction, we end up
+ // with an offset that is 4 bytes too small. This requires us to
+ // add 4 to the fixup value before applying it.
*Dst = Value + 4;
break;
}
@@ -136,8 +144,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_si_rodata", 0, 32, 0 },
- { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
+ { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
};
if (Kind < FirstTargetFixupKind)
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
new file mode 100644
index 0000000..9ff9fe7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -0,0 +1,26 @@
+//===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUELFStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+
+using namespace llvm;
+
+void AMDGPUELFStreamer::InitSections(bool NoExecStack) {
+ // Start with the .hsatext section by default.
+ SwitchSection(AMDGPU::getHSATextSection(getContext()));
+}
+
+MCELFStreamer *llvm::createAMDGPUELFStreamer(MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return new AMDGPUELFStreamer(Context, MAB, OS, Emitter);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
new file mode 100644
index 0000000..488d7e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -0,0 +1,40 @@
+//===-------- AMDGPUELFStreamer.h - ELF Object Output ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer which allows us to insert some hooks before
+// emitting data into an actual object file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+
+class AMDGPUELFStreamer : public MCELFStreamer {
+public:
+ AMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter)
+ : MCELFStreamer(Context, MAB, OS, Emitter) { }
+
+ virtual void InitSections(bool NoExecStac) override;
+};
+
+MCELFStreamer *createAMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
+} // namespace llvm.
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index 01021d6..59a9178 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -21,9 +21,6 @@ enum Fixups {
/// fixup for global addresses with constant initializers
fixup_si_rodata,
- /// fixup for offset from instruction to end of text section
- fixup_si_end_of_text,
-
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 028a86d..68b1d1a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -22,13 +22,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
InlineAsmEnd = ";#ASMEND";
//===--- Data Emission Directives -------------------------------------===//
- ZeroDirective = ".zero";
- AsciiDirective = ".ascii\t";
- AscizDirective = ".asciz\t";
- Data8bitsDirective = ".byte\t";
- Data16bitsDirective = ".short\t";
- Data32bitsDirective = ".long\t";
- Data64bitsDirective = ".quad\t";
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
@@ -41,3 +34,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
}
+
+bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+ return SectionName == ".hsatext" || SectionName == ".hsadata_global_agent" ||
+ SectionName == ".hsadata_global_program" ||
+ SectionName == ".hsarodata_readonly_agent" ||
+ MCAsmInfo::shouldOmitSectionDirective(SectionName);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index a5bac51..a546961 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -21,12 +21,13 @@ class Triple;
// If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
// you will need to make sure your new class sets PrivateGlobalPrefix to
-// a prefix that won't appeary in a fuction name. The default value
+// a prefix that won't appear in a function name. The default value
// for PrivateGlobalPrefix is 'L', so it will consider any function starting
// with 'L' as a local symbol.
class AMDGPUMCAsmInfo : public MCAsmInfoELF {
public:
explicit AMDGPUMCAsmInfo(const Triple &TT);
+ bool shouldOmitSectionDirective(StringRef SectionName) const override;
};
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index c709741..f704094 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUELFStreamer.h"
#include "AMDGPUMCAsmInfo.h"
#include "AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
@@ -85,6 +86,15 @@ static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
return new AMDGPUTargetELFStreamer(S);
}
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
+ if (T.getOS() == Triple::AMDHSA)
+ return createAMDGPUELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+
+ return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+}
+
extern "C" void LLVMInitializeAMDGPUTargetMC() {
for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -95,6 +105,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+ TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
}
// R600 specific registration
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 09e6cb1..b91134d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUTargetStreamer.h"
#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
@@ -220,6 +221,26 @@ AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
}
+void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+ unsigned Type) {
+ switch (Type) {
+ default: llvm_unreachable("Invalid AMDGPU symbol type");
+ case ELF::STT_AMDGPU_HSA_KERNEL:
+ OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
+ break;
+ }
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+ StringRef GlobalName) {
+ OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+ StringRef GlobalName) {
+ OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@@ -291,7 +312,35 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
OS.PushSection();
- OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+ // The MCObjectFileInfo that is available to the assembler is a generic
+ // implementation and not AMDGPUHSATargetObjectFile, so we can't use
+ // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
+ OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
OS.PopSection();
}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+ unsigned Type) {
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(SymbolName));
+ Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+ StringRef GlobalName) {
+
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(GlobalName));
+ Symbol->setType(ELF::STT_OBJECT);
+ Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+ StringRef GlobalName) {
+
+ MCSymbolELF *Symbol = cast<MCSymbolELF>(
+ getStreamer().getContext().getOrCreateSymbol(GlobalName));
+ Symbol->setType(ELF::STT_OBJECT);
+ Symbol->setBinding(ELF::STB_GLOBAL);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index d37677c..83bb728 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -7,6 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+
#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -27,6 +30,12 @@ public:
StringRef ArchName) = 0;
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+
+ virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
+
+ virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
+
+ virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
};
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -41,6 +50,12 @@ public:
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+ void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
};
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@@ -72,6 +87,12 @@ public:
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+ void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+ void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+ void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
};
}
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index e683498..3c1142d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -37,7 +37,6 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
const MCRegisterInfo &MRI;
public:
-
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
: MCII(mcii), MRI(mri) { }
@@ -50,8 +49,8 @@ public:
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
-private:
+private:
void EmitByte(unsigned int byte, raw_ostream &OS) const;
void Emit(uint32_t value, raw_ostream &OS) const;
@@ -59,7 +58,6 @@ private:
unsigned getHWRegChan(unsigned reg) const;
unsigned getHWReg(unsigned regNo) const;
-
};
} // End anonymous namespace
@@ -83,7 +81,7 @@ enum FCInstr {
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
- MCContext &Ctx) {
+ MCContext &Ctx) {
return new R600MCCodeEmitter(MCII, MRI);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 65a0eeb..9eb3dad 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -36,7 +36,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
void operator=(const SIMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCRegisterInfo &MRI;
- MCContext &Ctx;
/// \brief Can this operand also contain immediate values?
bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -47,7 +46,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
public:
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
MCContext &ctx)
- : MCII(mcii), MRI(mri), Ctx(ctx) { }
+ : MCII(mcii), MRI(mri) { }
~SIMCCodeEmitter() override {}
@@ -250,17 +249,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
if (MO.isExpr()) {
const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
- MCFixupKind Kind;
- const MCSymbol *Sym =
- Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-
- if (&Expr->getSymbol() == Sym) {
- // Add the offset to the beginning of the constant values.
- Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
- } else {
- // This is used for constant data stored in .rodata.
- Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
- }
+ MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/Processors.td b/contrib/llvm/lib/Target/AMDGPU/Processors.td
index d9a0723..a1584a2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Processors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/Processors.td
@@ -142,3 +142,7 @@ def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
>;
+
+def : ProcessorModel<"stoney", SIQuarterSpeedModel,
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index c8f37f6..bd80bb2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -405,8 +405,8 @@ private:
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
- getLiteral(BI, Literals);
- ClauseContent.push_back(BI);
+ getLiteral(&*BI, Literals);
+ ClauseContent.push_back(&*BI);
}
I = BI;
DeleteMI->eraseFromParent();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 4e4d554..124a9c6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -190,6 +190,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM,
setSchedulingPreference(Sched::Source);
}
+static inline bool isEOP(MachineBasicBlock::iterator I) {
+ return std::next(I)->getOpcode() == AMDGPU::RETURN;
+}
+
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineFunction * MF = BB->getParent();
@@ -276,12 +280,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
- .addImm(EOP); // Set End of program bit
+ .addImm(isEOP(I)); // Set End of program bit
+ break;
+ }
+ case AMDGPU::RAT_STORE_TYPED_eg: {
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1))
+ .addOperand(MI->getOperand(2))
+ .addImm(isEOP(I)); // Set End of program bit
break;
}
@@ -539,7 +549,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
}
}
}
- bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+ bool EOP = isEOP(I);
if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
@@ -946,6 +956,8 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
SDLoc DL(Op);
+
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FADD, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
@@ -1936,6 +1948,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
Arg->getOperand(0).getOperand(Element));
}
}
+ break;
}
case ISD::SELECT_CC: {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 855fa9f..8b6eea1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -922,7 +922,7 @@ bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const{
+ BranchProbability Probability) const{
return true;
}
@@ -933,14 +933,14 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
MachineBasicBlock &FMBB,
unsigned NumFCycles,
unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return true;
}
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumCyles,
- const BranchProbability &Probability)
+ BranchProbability Probability)
const {
return true;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index dee4c2b..e7251c3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -174,18 +174,18 @@ namespace llvm {
bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override ;
+ BranchProbability Probability) const override ;
bool
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index 7beed09..33ef6a4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1655,7 +1655,7 @@ def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
// ISel Patterns
//===----------------------------------------------------------------------===//
-// CND*_INT Pattterns for f32 True / False values
+// CND*_INT Patterns for f32 True / False values
class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
(selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 0c06ccc..5efb3b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -318,7 +318,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
MRI = &(Fn.getRegInfo());
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
- MachineBasicBlock *MB = MBB;
+ MachineBasicBlock *MB = &*MBB;
PreviousRegSeq.clear();
PreviousRegSeqByReg.clear();
PreviousRegSeqByUndefCount.clear();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index deee5bc..2126961 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -81,11 +81,11 @@ private:
int LastDstChan = -1;
do {
bool isTrans = false;
- int BISlot = getSlot(BI);
+ int BISlot = getSlot(&*BI);
if (LastDstChan >= BISlot)
isTrans = true;
LastDstChan = BISlot;
- if (TII->isPredicated(BI))
+ if (TII->isPredicated(&*BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
@@ -95,7 +95,7 @@ private:
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
- if (isTrans || TII->isTransOnly(BI)) {
+ if (isTrans || TII->isTransOnly(&*BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
@@ -149,7 +149,7 @@ private:
public:
// Ctor.
R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
- : VLIWPacketizerList(MF, MLI, true),
+ : VLIWPacketizerList(MF, MLI, nullptr),
TII(static_cast<const R600InstrInfo *>(
MF.getSubtarget().getInstrInfo())),
TRI(TII->getRegisterInfo()) {
@@ -162,14 +162,14 @@ public:
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) override {
+ bool ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock *MBB) override {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) override {
+ bool isSoloInstruction(const MachineInstr *MI) override {
if (TII->isVector(*MI))
return true;
if (!TII->isALUInstr(MI->getOpcode()))
@@ -375,7 +375,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
// instruction stream until we find the nearest boundary.
MachineBasicBlock::iterator I = RegionEnd;
for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
+ if (TII->isSchedulingBoundary(&*std::prev(I), &*MBB, Fn))
break;
}
I = MBB->begin();
@@ -392,7 +392,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
continue;
}
- Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+ Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
RegionEnd = I;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 9713e60..4f8a129 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -35,7 +35,7 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
+ const TargetRegisterClass *getCFGStructurizerRegClass(MVT VT) const;
const RegClassWeight &
getRegClassWeight(const TargetRegisterClass *RC) const override;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index ccfbf1b..fa4d24a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -312,11 +312,10 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
Preds.push_back(*PI);
}
- BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
- LI, false);
+ BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
}
- CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+ CallInst::Create(EndCf, popSaved(), "", &*BB->getFirstInsertionPt());
}
/// \brief Annotate the control flow with intrinsics so the backend can
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index 4c32639..7f79dd3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -37,7 +37,8 @@ enum {
MIMG = 1 << 18,
FLAT = 1 << 19,
WQM = 1 << 20,
- VGPRSpill = 1 << 21
+ VGPRSpill = 1 << 21,
+ VOPAsmPrefer32Bit = 1 << 22
};
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
index 5fe8d19..636750d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
@@ -16,15 +16,9 @@
#include "AMDGPU.h"
#include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 23502b4..96e37c5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -82,22 +82,10 @@ using namespace llvm;
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
-
-private:
+public:
static char ID;
- const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const;
- bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const;
-public:
- SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
+ SIFixSGPRCopies() : MachineFunctionPass(ID) { }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -105,14 +93,23 @@ public:
return "SI Fix SGPR copies";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
+INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
+ "SI Fix SGPR copies", false, false)
+
char SIFixSGPRCopies::ID = 0;
-FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
- return new SIFixSGPRCopies(tm);
+char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
+
+FunctionPass *llvm::createSIFixSGPRCopiesPass() {
+ return new SIFixSGPRCopies();
}
static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
@@ -128,77 +125,115 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
return false;
}
-/// This functions walks the use list of Reg until it finds an Instruction
-/// that isn't a COPY returns the register class of that instruction.
-/// \return The register defined by the first non-COPY instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
-
- const TargetRegisterClass *RC
- = TargetRegisterInfo::isVirtualRegister(Reg) ?
- MRI.getRegClass(Reg) :
- TRI->getPhysRegClass(Reg);
-
- RC = TRI->getSubRegClass(RC, SubReg);
- for (MachineRegisterInfo::use_instr_iterator
- I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
- switch (I->getOpcode()) {
- case AMDGPU::COPY:
- RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
- I->getOperand(0).getReg(),
- I->getOperand(0).getSubReg()));
- break;
- }
- }
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getCopyRegClasses(const MachineInstr &Copy,
+ const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI) {
+ unsigned DstReg = Copy.getOperand(0).getReg();
+ unsigned SrcReg = Copy.getOperand(1).getReg();
+
+ const TargetRegisterClass *SrcRC =
+ TargetRegisterInfo::isVirtualRegister(SrcReg) ?
+ MRI.getRegClass(SrcReg) :
+ TRI.getPhysRegClass(SrcReg);
- return RC;
+ // We don't really care about the subregister here.
+ // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
+
+ const TargetRegisterClass *DstRC =
+ TargetRegisterInfo::isVirtualRegister(DstReg) ?
+ MRI.getRegClass(DstReg) :
+ TRI.getPhysRegClass(DstReg);
+
+ return std::make_pair(SrcRC, DstRC);
}
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI,
- unsigned Reg,
- unsigned SubReg) const {
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
- return TRI->getSubRegClass(RC, SubReg);
- }
- MachineInstr *Def = MRI.getVRegDef(Reg);
- if (Def->getOpcode() != AMDGPU::COPY) {
- return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
- }
+static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
+ return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+}
- return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
- Def->getOperand(1).getSubReg());
+static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
+ const TargetRegisterClass *DstRC,
+ const SIRegisterInfo &TRI) {
+ return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
}
-bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
- const SIRegisterInfo *TRI,
- const MachineRegisterInfo &MRI) const {
+// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
+//
+// SGPRx = ...
+// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+// VGPRz = COPY SGPRy
+//
+// ==>
+//
+// VGPRx = COPY SGPRx
+// VGPRz = REG_SEQUENCE VGPRx, sub0
+//
+// This exposes immediate folding opportunities when materializing 64-bit
+// immediates.
+static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
+ const SIRegisterInfo *TRI,
+ const SIInstrInfo *TII,
+ MachineRegisterInfo &MRI) {
+ assert(MI.isRegSequence());
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
+ return false;
- unsigned DstReg = Copy.getOperand(0).getReg();
- unsigned SrcReg = Copy.getOperand(1).getReg();
- unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
+ if (!MRI.hasOneUse(DstReg))
+ return false;
- if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // If the destination register is a physical register there isn't really
- // much we can do to fix this.
+ MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
+ if (!CopyUse.isCopy())
return false;
- }
- const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
- const TargetRegisterClass *SrcRC;
+ if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
+ return false;
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
+ // TODO: Could have multiple extracts?
+ unsigned SubReg = CopyUse.getOperand(1).getSubReg();
+ if (SubReg != AMDGPU::NoSubRegister)
return false;
- SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
- return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+ MRI.setRegClass(DstReg, DstRC);
+
+ // SGPRx = ...
+ // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+ // VGPRz = COPY SGPRy
+
+ // =>
+ // VGPRx = COPY SGPRx
+ // VGPRz = REG_SEQUENCE VGPRx, sub0
+
+ MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+
+ for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+ unsigned SrcReg = MI.getOperand(I).getReg();
+ unsigned SrcSubReg = MI.getOperand(I).getReg();
+
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ assert(TRI->isSGPRClass(SrcRC) &&
+ "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
+
+ SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
+ const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
+
+ unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
+
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
+ .addOperand(MI.getOperand(I));
+
+ MI.getOperand(I).setReg(TmpReg);
+ }
+
+ CopyUse.eraseFromParent();
+ return true;
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
@@ -207,40 +242,38 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ SmallVector<MachineInstr *, 16> Worklist;
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
+ I != E; ++I) {
MachineInstr &MI = *I;
- if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
- DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
- DEBUG(MI.print(dbgs()));
- TII->moveToVALU(MI);
-
- }
switch (MI.getOpcode()) {
- default: continue;
- case AMDGPU::PHI: {
- DEBUG(dbgs() << "Fixing PHI: " << MI);
-
- for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
- const MachineOperand &Op = MI.getOperand(i);
- unsigned Reg = Op.getReg();
- const TargetRegisterClass *RC
- = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
+ default:
+ continue;
+ case AMDGPU::COPY: {
+ // If the destination register is a physical register there isn't really
+ // much we can do to fix this.
+ if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
+ continue;
- MRI.constrainRegClass(Op.getReg(), RC);
- }
- unsigned Reg = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
- MI.getOperand(0).getSubReg());
- if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
- MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
+ const TargetRegisterClass *SrcRC, *DstRC;
+ std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
+ if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
+ DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+ TII->moveToVALU(MI);
}
+ break;
+ }
+ case AMDGPU::PHI: {
+ DEBUG(dbgs() << "Fixing PHI: " << MI);
+ unsigned Reg = MI.getOperand(0).getReg();
if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
break;
@@ -310,8 +343,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}
case AMDGPU::REG_SEQUENCE: {
if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
- !hasVGPROperands(MI, TRI))
+ !hasVGPROperands(MI, TRI)) {
+ foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
continue;
+ }
DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
index 0c54446..8bda283 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
@@ -7,9 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file
-/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define in some cases.
+/// \file SALU instructions ignore the execution mask, so we need to modify the
+/// live ranges of the registers they define in some cases.
///
/// The main case we need to handle is when a def is used in one side of a
/// branch and not another. For example:
@@ -42,13 +41,15 @@
/// ENDIF
/// %use
///
-/// Adding this use will make the def live thoughout the IF branch, which is
+/// Adding this use will make the def live throughout the IF branch, which is
/// what we want.
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachinePostDominators.h"
@@ -79,9 +80,13 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LiveIntervals>();
+ AU.addRequired<LiveVariables>();
+ AU.addPreserved<LiveVariables>();
+
AU.addRequired<MachinePostDominatorTree>();
+ AU.addPreserved<MachinePostDominatorTree>();
AU.setPreservesCFG();
+
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -90,7 +95,7 @@ public:
INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
"SI Fix SGPR Live Ranges", false, false)
@@ -108,40 +113,48 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
- LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
- MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
- std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+ bool MadeChange = false;
+
+ MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+ SmallVector<unsigned, 16> SGPRLiveRanges;
+
+ LiveVariables *LV = &getAnalysis<LiveVariables>();
+ MachineBasicBlock *Entry = &MF.front();
- // First pass, collect all live intervals for SGPRs
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ // Use a depth first order so that in SSA, we encounter all defs before
+ // uses. Once the defs of the block have been found, attempt to insert
+ // SGPR_USE instructions in successor blocks if required.
+ for (MachineBasicBlock *MBB : depth_first(Entry)) {
+ for (const MachineInstr &MI : *MBB) {
for (const MachineOperand &MO : MI.defs()) {
- if (MO.isImplicit())
- continue;
+ // We should never see a live out def of a physical register, so we also
+ // do not need to worry about implicit_defs().
unsigned Def = MO.getReg();
if (TargetRegisterInfo::isVirtualRegister(Def)) {
- if (TRI->isSGPRClass(MRI.getRegClass(Def)))
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getInterval(Def)));
- } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
- SGPRLiveRanges.push_back(
- std::make_pair(Def, &LIS->getRegUnit(Def)));
+ if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
+ // Only consider defs that are live outs. We don't care about def /
+ // use within the same block.
+
+ // LiveVariables does not consider registers that are only used in a
+ // phi in a sucessor block as live out, unlike LiveIntervals.
+ //
+ // This is OK because SIFixSGPRCopies replaced any SGPR phis with
+ // VGPRs.
+ if (LV->isLiveOut(Def, *MBB))
+ SGPRLiveRanges.push_back(Def);
+ }
}
}
}
- }
- // Second pass fix the intervals
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
- MachineBasicBlock &MBB = *BI;
- if (MBB.succ_size() < 2)
+ if (MBB->succ_size() < 2)
continue;
- // We have structured control flow, so number of succesors should be two.
- assert(MBB.succ_size() == 2);
- MachineBasicBlock *SuccA = *MBB.succ_begin();
- MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+ // We have structured control flow, so the number of successors should be
+ // two.
+ assert(MBB->succ_size() == 2);
+ MachineBasicBlock *SuccA = *MBB->succ_begin();
+ MachineBasicBlock *SuccB = *(++MBB->succ_begin());
MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
if (!NCD)
@@ -156,37 +169,51 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
*(++NCD->succ_begin()));
}
- assert(SuccA && SuccB);
- for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
- unsigned Reg = RegLR.first;
- LiveRange *LR = RegLR.second;
-
- // FIXME: We could be smarter here. If the register is Live-In to
- // one block, but the other doesn't have any SGPR defs, then there
- // won't be a conflict. Also, if the branch decision is based on
- // a value in an SGPR, then there will be no conflict.
- bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
- bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
-
- if ((!LiveInToA && !LiveInToB) ||
- (LiveInToA && LiveInToB))
+
+ for (unsigned Reg : SGPRLiveRanges) {
+ // FIXME: We could be smarter here. If the register is Live-In to one
+ // block, but the other doesn't have any SGPR defs, then there won't be a
+ // conflict. Also, if the branch condition is uniform then there will be
+ // no conflict.
+ bool LiveInToA = LV->isLiveIn(Reg, *SuccA);
+ bool LiveInToB = LV->isLiveIn(Reg, *SuccB);
+
+ if (!LiveInToA && !LiveInToB) {
+ DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+ << " is live into neither successor\n");
continue;
+ }
+
+ if (LiveInToA && LiveInToB) {
+ DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+ << " is live into both successors\n");
+ continue;
+ }
// This interval is live in to one successor, but not the other, so
// we need to update its range so it is live in to both.
- DEBUG(dbgs() << "Possible SGPR conflict detected " << " in " << *LR <<
- " BB#" << SuccA->getNumber() << ", BB#" <<
- SuccB->getNumber() <<
- " with NCD = " << NCD->getNumber() << '\n');
+ DEBUG(dbgs() << "Possible SGPR conflict detected for "
+ << PrintReg(Reg, TRI, 0)
+ << " BB#" << SuccA->getNumber()
+ << ", BB#" << SuccB->getNumber()
+ << " with NCD = BB#" << NCD->getNumber() << '\n');
+
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "Not expecting to extend live range of physreg");
// FIXME: Need to figure out how to update LiveRange here so this pass
// will be able to preserve LiveInterval analysis.
- BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
- TII->get(AMDGPU::SGPR_USE))
- .addReg(Reg, RegState::Implicit);
- DEBUG(NCD->getFirstNonPHI()->dump());
+ MachineInstr *NCDSGPRUse =
+ BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+ TII->get(AMDGPU::SGPR_USE))
+ .addReg(Reg, RegState::Implicit);
+
+ MadeChange = true;
+ LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
+
+ DEBUG(NCDSGPRUse->dump());
}
}
- return false;
+ return MadeChange;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c288725..02a3930 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -45,6 +45,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -164,8 +165,8 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0;
- unsigned CommuteIdx1;
+ unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
if (CanCommute) {
@@ -175,7 +176,16 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
OpNo = CommuteIdx0;
}
- if (!CanCommute || !TII->commuteInstruction(MI))
+ // One of operands might be an Imm operand, and OpNo may refer to it after
+ // the call of commuteInstruction() below. Such situations are avoided
+ // here explicitly as OpNo must be a register operand to be a candidate
+ // for memory folding.
+ if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
+ !MI->getOperand(CommuteIdx1).isReg()))
+ return false;
+
+ if (!CanCommute ||
+ !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1))
return false;
if (!TII->isOperandLegal(MI, OpNo, OpToFold))
@@ -186,6 +196,110 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
return true;
}
+static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
+ unsigned UseOpIdx,
+ std::vector<FoldCandidate> &FoldList,
+ SmallVectorImpl<MachineInstr *> &CopiesToReplace,
+ const SIInstrInfo *TII, const SIRegisterInfo &TRI,
+ MachineRegisterInfo &MRI) {
+ const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
+
+ // FIXME: Fold operands with subregs.
+ if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+ UseOp.isImplicit())) {
+ return;
+ }
+
+ bool FoldingImm = OpToFold.isImm();
+ APInt Imm;
+
+ if (FoldingImm) {
+ unsigned UseReg = UseOp.getReg();
+ const TargetRegisterClass *UseRC
+ = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+ MRI.getRegClass(UseReg) :
+ TRI.getPhysRegClass(UseReg);
+
+ Imm = APInt(64, OpToFold.getImm());
+
+ const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
+ const TargetRegisterClass *FoldRC =
+ TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+ // Split 64-bit constants into 32-bits for folding.
+ if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
+ if (UseRC->getSize() != 8)
+ return;
+
+ if (UseOp.getSubReg() == AMDGPU::sub0) {
+ Imm = Imm.getLoBits(32);
+ } else {
+ assert(UseOp.getSubReg() == AMDGPU::sub1);
+ Imm = Imm.getHiBits(32);
+ }
+ }
+
+ // In order to fold immediates into copies, we need to change the
+ // copy to a MOV.
+ if (UseMI->getOpcode() == AMDGPU::COPY) {
+ unsigned DestReg = UseMI->getOperand(0).getReg();
+ const TargetRegisterClass *DestRC
+ = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ TRI.getPhysRegClass(DestReg);
+
+ unsigned MovOp = TII->getMovOpcode(DestRC);
+ if (MovOp == AMDGPU::COPY)
+ return;
+
+ UseMI->setDesc(TII->get(MovOp));
+ CopiesToReplace.push_back(UseMI);
+ }
+ }
+
+ // Special case for REG_SEQUENCE: We can't fold literals into
+ // REG_SEQUENCE instructions, so we have to fold them into the
+ // uses of REG_SEQUENCE.
+ if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+ unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+ for (MachineRegisterInfo::use_iterator
+ RSUse = MRI.use_begin(RegSeqDstReg),
+ RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+ MachineInstr *RSUseMI = RSUse->getParent();
+ if (RSUse->getSubReg() != RegSeqDstSubReg)
+ continue;
+
+ foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+ CopiesToReplace, TII, TRI, MRI);
+ }
+ return;
+ }
+
+ const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+ // Don't fold into target independent nodes. Target independent opcodes
+ // don't have defined register classes.
+ if (UseDesc.isVariadic() ||
+ UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+ return;
+
+ if (FoldingImm) {
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ return;
+ }
+
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+ // FIXME: We could try to change the instruction from 64-bit to 32-bit
+ // to enable more folding opportunites. The shrink operands pass
+ // already does this.
+ return;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -226,88 +340,36 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
OpToFold.getSubReg()))
continue;
+
+ // We need mutate the operands of new mov instructions to add implicit
+ // uses of EXEC, but adding them invalidates the use_iterator, so defer
+ // this.
+ SmallVector<MachineInstr *, 4> CopiesToReplace;
+
std::vector<FoldCandidate> FoldList;
for (MachineRegisterInfo::use_iterator
Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
Use != E; ++Use) {
MachineInstr *UseMI = Use->getParent();
- const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
- // FIXME: Fold operands with subregs.
- if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
- UseOp.isImplicit())) {
- continue;
- }
-
- APInt Imm;
-
- if (FoldingImm) {
- unsigned UseReg = UseOp.getReg();
- const TargetRegisterClass *UseRC
- = TargetRegisterInfo::isVirtualRegister(UseReg) ?
- MRI.getRegClass(UseReg) :
- TRI.getPhysRegClass(UseReg);
-
- Imm = APInt(64, OpToFold.getImm());
-
- // Split 64-bit constants into 32-bits for folding.
- if (UseOp.getSubReg()) {
- if (UseRC->getSize() != 8)
- continue;
-
- if (UseOp.getSubReg() == AMDGPU::sub0) {
- Imm = Imm.getLoBits(32);
- } else {
- assert(UseOp.getSubReg() == AMDGPU::sub1);
- Imm = Imm.getHiBits(32);
- }
- }
-
- // In order to fold immediates into copies, we need to change the
- // copy to a MOV.
- if (UseMI->getOpcode() == AMDGPU::COPY) {
- unsigned DestReg = UseMI->getOperand(0).getReg();
- const TargetRegisterClass *DestRC
- = TargetRegisterInfo::isVirtualRegister(DestReg) ?
- MRI.getRegClass(DestReg) :
- TRI.getPhysRegClass(DestReg);
-
- unsigned MovOp = TII->getMovOpcode(DestRC);
- if (MovOp == AMDGPU::COPY)
- continue;
-
- UseMI->setDesc(TII->get(MovOp));
- }
- }
-
- const MCInstrDesc &UseDesc = UseMI->getDesc();
-
- // Don't fold into target independent nodes. Target independent opcodes
- // don't have defined register classes.
- if (UseDesc.isVariadic() ||
- UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
- continue;
-
- if (FoldingImm) {
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
- continue;
- }
-
- tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
-
- // FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
- // already does this.
+ foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
+ CopiesToReplace, TII, TRI, MRI);
}
+ // Make sure we add EXEC uses to any new v_mov instructions created.
+ for (MachineInstr *Copy : CopiesToReplace)
+ Copy->addImplicitDefUseOperands(MF);
+
for (FoldCandidate &Fold : FoldList) {
if (updateOperand(Fold, TRI)) {
// Clear kill flags.
if (!Fold.isImm()) {
assert(Fold.OpToFold && Fold.OpToFold->isReg());
- Fold.OpToFold->setIsKill(false);
+ // FIXME: Probably shouldn't bother trying to fold if not an
+ // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
+ // copies.
+ MRI.clearKillFlags(Fold.OpToFold->getReg());
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
new file mode 100644
index 0000000..6b3c81c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -0,0 +1,243 @@
+//===----------------------- SIFrameLowering.cpp --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#include "SIFrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+
+using namespace llvm;
+
+
+static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
+ const MachineFrameInfo *FrameInfo) {
+ if (!FuncInfo->hasSpilledSGPRs())
+ return false;
+
+ if (FuncInfo->hasSpilledVGPRs())
+ return false;
+
+ for (int I = FrameInfo->getObjectIndexBegin(),
+ E = FrameInfo->getObjectIndexEnd(); I != E; ++I) {
+ if (!FrameInfo->isSpillSlotObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
+static ArrayRef<MCPhysReg> getAllSGPR128() {
+ return makeArrayRef(AMDGPU::SReg_128RegClass.begin(),
+ AMDGPU::SReg_128RegClass.getNumRegs());
+}
+
+static ArrayRef<MCPhysReg> getAllSGPRs() {
+ return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
+ AMDGPU::SGPR_32RegClass.getNumRegs());
+}
+
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ if (!MF.getFrameInfo()->hasStackObjects())
+ return;
+
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // If we only have SGPR spills, we won't actually be using scratch memory
+ // since these spill to VGPRs.
+ //
+ // FIXME: We should be cleaning up these unused SGPR spill frame indices
+ // somewhere.
+ if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
+ return;
+
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ // We need to insert initialization of the scratch resource descriptor.
+ unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+ assert(ScratchRsrcReg != AMDGPU::NoRegister);
+
+ unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
+
+ unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+ unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
+ if (ST.isAmdHsaOS()) {
+ PreloadedPrivateBufferReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ }
+
+ // If we reserved the original input registers, we don't need to copy to the
+ // reserved registers.
+ if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
+ // We should always reserve these 5 registers at the same time.
+ assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
+ "scratch wave offset and private segment buffer inconsistent");
+ return;
+ }
+
+
+ // We added live-ins during argument lowering, but since they were not used
+ // they were deleted. We're adding the uses now, so add them back.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+ MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
+
+ if (ST.isAmdHsaOS()) {
+ MRI.addLiveIn(PreloadedPrivateBufferReg);
+ MBB.addLiveIn(PreloadedPrivateBufferReg);
+ }
+
+ // We reserved the last registers for this. Shift it down to the end of those
+ // which were actually used.
+ //
+ // FIXME: It might be safer to use a pseudoregister before replacement.
+
+ // FIXME: We should be able to eliminate unused input registers. We only
+ // cannot do this for the resources required for scratch access. For now we
+ // skip over user SGPRs and may leave unused holes.
+
+ // We find the resource first because it has an alignment requirement.
+ if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+ // Pick the first unallocated one. Make sure we don't clobber the other
+ // reserved input we needed.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg));
+ MRI.replaceRegWith(ScratchRsrcReg, Reg);
+ ScratchRsrcReg = Reg;
+ MFI->setScratchRSrcReg(ScratchRsrcReg);
+ break;
+ }
+ }
+ }
+
+ if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Skip the last 2 elements because the last one is reserved for VCC, and
+ // this is the 2nd to last element already.
+ unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+ for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+ // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+ // scratch descriptor, since we haven’t added its uses yet.
+ if (!MRI.isPhysRegUsed(Reg)) {
+ assert(MRI.isAllocatable(Reg) &&
+ !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+
+ MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ ScratchWaveOffsetReg = Reg;
+ MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ break;
+ }
+ }
+ }
+
+
+ assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
+
+ const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL;
+
+ if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+ // Make sure we emit the copy for the offset first. We may have chosen to copy
+ // the buffer resource into a register that aliases the input offset register.
+ BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
+ .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+ }
+
+ if (ST.isAmdHsaOS()) {
+ // Insert copies from argument register.
+ assert(
+ !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
+ !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
+
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+ unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3);
+
+ unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1);
+ unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3);
+
+ const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64);
+
+ BuildMI(MBB, I, DL, SMovB64, Rsrc01)
+ .addReg(Lo, RegState::Kill);
+ BuildMI(MBB, I, DL, SMovB64, Rsrc23)
+ .addReg(Hi, RegState::Kill);
+ } else {
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ // Use relocations to get the pointer, and setup the other bits manually.
+ uint64_t Rsrc23 = TII->getScratchRsrcWords23();
+ BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc2)
+ .addImm(Rsrc23 & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc3)
+ .addImm(Rsrc23 >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ }
+
+ // Make the register selected live throughout the function.
+ for (MachineBasicBlock &OtherBB : MF) {
+ if (&OtherBB == &MBB)
+ continue;
+
+ OtherBB.addLiveIn(ScratchRsrcReg);
+ OtherBB.addLiveIn(ScratchWaveOffsetReg);
+ }
+}
+
+void SIFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF,
+ RegScavenger *RS) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ if (!MFI->hasStackObjects())
+ return;
+
+ bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
+
+ assert((RS || !MayNeedScavengingEmergencySlot) &&
+ "RegScavenger required if spilling");
+
+ if (MayNeedScavengingEmergencySlot) {
+ int ScavengeFI = MFI->CreateSpillStackObject(
+ AMDGPU::SGPR_32RegClass.getSize(),
+ AMDGPU::SGPR_32RegClass.getAlignment());
+ RS->addScavengingFrameIndex(ScavengeFI);
+ }
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
new file mode 100644
index 0000000..a9152fd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -0,0 +1,34 @@
+//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+
+#include "AMDGPUFrameLowering.h"
+
+namespace llvm {
+
+class SIFrameLowering final : public AMDGPUFrameLowering {
+public:
+ SIFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+ unsigned TransAl = 1) :
+ AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+ ~SIFrameLowering() override {}
+
+ void emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
+
+ void processFunctionBeforeFrameFinalized(
+ MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index c2db9ff..0e043cb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -20,6 +20,7 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
@@ -51,6 +52,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
+
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
@@ -103,6 +107,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
@@ -155,13 +160,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
+
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
+
setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
+
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
+
+ setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
@@ -173,9 +195,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::SELECT, MVT::i1, Promote);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
+
+
+ setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -186,6 +213,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
case ISD::INSERT_VECTOR_ELT:
case ISD::INSERT_SUBVECTOR:
case ISD::EXTRACT_SUBVECTOR:
+ case ISD::SCALAR_TO_VECTOR:
break;
case ISD::CONCAT_VECTORS:
setOperationAction(Op, VT, Custom);
@@ -197,6 +225,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
}
}
+ // Most operations are naturally 32-bit vector operations. We only support
+ // load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
+ for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+ AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
+ }
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -261,6 +305,41 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
}
+bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
+ // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+ // additionally can do r + r + i with addr64. 32-bit has more addressing
+ // mode options. Depending on the resource constant, it can also do
+ // (i64 r0) + (i32 r1) * (i14 i).
+ //
+ // Private arrays end up using a scratch buffer most of the time, so also
+ // assume those use MUBUF instructions. Scratch loads / stores are currently
+ // implemented as mubuf instructions with offen bit set, so slightly
+ // different than the normal addr64.
+ if (!isUInt<12>(AM.BaseOffs))
+ return false;
+
+ // FIXME: Since we can split immediate into soffset and immediate offset,
+ // would it make sense to allow any immediate?
+
+ switch (AM.Scale) {
+ case 0: // r + i or just i, depending on HasBaseReg.
+ return true;
+ case 1:
+ return true; // We have r + r or r + i.
+ case 2:
+ if (AM.HasBaseReg) {
+ // Reject 2 * r + r.
+ return false;
+ }
+
+ // Allow 2 * r as r + r
+ // Or 2 * r + i is allowed as r + r + i.
+ return true;
+ default: // Don't allow n * r
+ return false;
+ }
+}
+
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
@@ -269,7 +348,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::GLOBAL_ADDRESS: {
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
@@ -282,51 +361,51 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
- // fall-through
- case AMDGPUAS::PRIVATE_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
- // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
- // additionally can do r + r + i with addr64. 32-bit has more addressing
- // mode options. Depending on the resource constant, it can also do
- // (i64 r0) + (i32 r1) * (i14 i).
- //
- // SMRD instructions have an 8-bit, dword offset.
- //
- // Assume nonunifom access, since the address space isn't enough to know
- // what instruction we will use, and since we don't know if this is a load
- // or store and scalar stores are only available on VI.
- //
- // We also know if we are doing an extload, we can't do a scalar load.
- //
- // Private arrays end up using a scratch buffer most of the time, so also
- // assume those use MUBUF instructions. Scratch loads / stores are currently
- // implemented as mubuf instructions with offen bit set, so slightly
- // different than the normal addr64.
- if (!isUInt<12>(AM.BaseOffs))
- return false;
- // FIXME: Since we can split immediate into soffset and immediate offset,
- // would it make sense to allow any immediate?
+ return isLegalMUBUFAddressingMode(AM);
+ }
+ case AMDGPUAS::CONSTANT_ADDRESS: {
+ // If the offset isn't a multiple of 4, it probably isn't going to be
+ // correctly aligned.
+ if (AM.BaseOffs % 4 != 0)
+ return isLegalMUBUFAddressingMode(AM);
+
+ // There are no SMRD extloads, so if we have to do a small type access we
+ // will use a MUBUF load.
+ // FIXME?: We also need to do this if unaligned, but we don't know the
+ // alignment here.
+ if (DL.getTypeStoreSize(Ty) < 4)
+ return isLegalMUBUFAddressingMode(AM);
+
+ if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+ // SMRD instructions have an 8-bit, dword offset on SI.
+ if (!isUInt<8>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+ // On CI+, this can also be a 32-bit literal constant offset. If it fits
+ // in 8-bits, it can use a smaller encoding.
+ if (!isUInt<32>(AM.BaseOffs / 4))
+ return false;
+ } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // On VI, these use the SMEM format and the offset is 20-bit in bytes.
+ if (!isUInt<20>(AM.BaseOffs))
+ return false;
+ } else
+ llvm_unreachable("unhandled generation");
- switch (AM.Scale) {
- case 0: // r + i or just i, depending on HasBaseReg.
+ if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
- case 1:
- return true; // We have r + r or r + i.
- case 2:
- if (AM.HasBaseReg) {
- // Reject 2 * r + r.
- return false;
- }
- // Allow 2 * r as r + r
- // Or 2 * r + i is allowed as r + r + i.
+ if (AM.Scale == 1 && AM.HasBaseReg)
return true;
- default: // Don't allow n * r
- return false;
- }
+
+ return false;
}
+
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+ return isLegalMUBUFAddressingMode(AM);
+
case AMDGPUAS::LOCAL_ADDRESS:
case AMDGPUAS::REGION_ADDRESS: {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
@@ -374,7 +453,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
- return Align % 4 == 0;
+ bool AlignedBy4 = (Align % 4 == 0);
+ if (IsFast)
+ *IsFast = AlignedBy4;
+ return AlignedBy4;
}
// Smaller than dword value must be aligned.
@@ -411,6 +493,32 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+ AS == AMDGPUAS::FLAT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+ unsigned DestAS) const {
+ return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
+}
+
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+ const MemSDNode *MemNode = cast<MemSDNode>(N);
+ const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers
+ if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+ isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
TargetLoweringBase::LegalizeTypeAction
SITargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -426,12 +534,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return TII->isInlineConstant(Imm);
}
-static EVT toIntegerVT(EVT VT) {
- if (VT.isVector())
- return VT.changeVectorElementTypeToInteger();
- return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc SL, SDValue Chain,
unsigned Offset, bool Signed) const {
@@ -439,7 +541,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -455,30 +557,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
unsigned Align = DL.getABITypeAlignment(Ty);
- if (VT != MemVT && VT.isFloatingPoint()) {
- // Do an integer load and convert.
- // FIXME: This is mostly because load legalization after type legalization
- // doesn't handle FP extloads.
- assert(VT.getScalarType() == MVT::f32 &&
- MemVT.getScalarType() == MVT::f16);
-
- EVT IVT = toIntegerVT(VT);
- EVT MemIVT = toIntegerVT(MemVT);
- SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
- IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
- false, // isVolatile
- true, // isNonTemporal
- true, // isInvariant
- Align); // Alignment
- SDValue Ops[] = {
- DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
- Load.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, SL);
- }
-
ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ if (MemVT.isFloatingPoint())
+ ExtTy = ISD::EXTLOAD;
+
return DAG.getLoad(ISD::UNINDEXED, ExtTy,
VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
false, // isVolatile
@@ -497,8 +579,16 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
+ const Function *Fn = MF.getFunction();
+ DiagnosticInfoUnsupported NoGraphicsHSA(*Fn, "non-compute shaders with HSA");
+ DAG.getContext()->diagnose(NoGraphicsHSA);
+ return SDValue();
+ }
- assert(CallConv == CallingConv::C);
+ // FIXME: We currently assume all calling conventions are kernels.
SmallVector<ISD::InputArg, 16> Splits;
BitVector Skipped(Ins.size());
@@ -513,7 +603,7 @@ SDValue SITargetLowering::LowerFormalArguments(
assert((PSInputNum <= 15) && "Too many PS inputs!");
if (!Arg.Used) {
- // We can savely skip PS inputs
+ // We can safely skip PS inputs
Skipped.set(i);
++PSInputNum;
continue;
@@ -530,7 +620,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
// three or five element vertex only needs three or five registers,
- // NOT four or eigth.
+ // NOT four or eight.
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
@@ -556,41 +646,30 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(AMDGPU::VGPR1);
}
- // The pointer to the list of arguments is stored in SGPR0, SGPR1
- // The pointer to the scratch buffer is stored in SGPR2, SGPR3
- if (Info->getShaderType() == ShaderType::COMPUTE) {
- if (Subtarget->isAmdHsaOS())
- Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
- else
- Info->NumUserSGPRs = 4;
-
- unsigned InputPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
- unsigned InputPtrRegLo =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned InputPtrRegHi =
- TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- unsigned ScratchPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchPtrRegLo =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
- unsigned ScratchPtrRegHi =
- TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
- CCInfo.AllocateReg(InputPtrRegLo);
- CCInfo.AllocateReg(InputPtrRegHi);
- CCInfo.AllocateReg(ScratchPtrRegLo);
- CCInfo.AllocateReg(ScratchPtrRegHi);
- MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
- MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
- }
-
if (Info->getShaderType() == ShaderType::COMPUTE) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
}
+ // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+ if (Info->hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ CCInfo.AllocateReg(PrivateSegmentBufferReg);
+ }
+
+ if (Info->hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(DispatchPtrReg);
+ }
+
+ if (Info->hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(InputPtrReg);
+ }
+
AnalyzeFormalArguments(CCInfo, Splits);
SmallVector<SDValue, 16> Chains;
@@ -617,7 +696,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Offset, Ins[i].Flags.isSExt());
Chains.push_back(Arg.getValue(1));
- const PointerType *ParamTy =
+ auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
@@ -678,10 +757,113 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
- if (Info->getShaderType() != ShaderType::COMPUTE) {
- unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
- AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
- Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+ // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+ // these from the dispatch pointer.
+
+ // Start adding system SGPRs.
+ if (Info->hasWorkGroupIDX()) {
+ unsigned Reg = Info->addWorkGroupIDX();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ } else
+ llvm_unreachable("work group id x is always enabled");
+
+ if (Info->hasWorkGroupIDY()) {
+ unsigned Reg = Info->addWorkGroupIDY();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkGroupIDZ()) {
+ unsigned Reg = Info->addWorkGroupIDZ();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkGroupInfo()) {
+ unsigned Reg = Info->addWorkGroupInfo();
+ MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasPrivateSegmentWaveByteOffset()) {
+ // Scratch wave offset passed in system SGPR.
+ unsigned PrivateSegmentWaveByteOffsetReg
+ = Info->addPrivateSegmentWaveByteOffset();
+
+ MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+ CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+ }
+
+ // Now that we've figured out where the scratch register inputs are, see if
+ // should reserve the arguments and use them directly.
+
+ bool HasStackObjects = MF.getFrameInfo()->hasStackObjects();
+
+ if (ST.isAmdHsaOS()) {
+ // TODO: Assume we will spill without optimizations.
+ if (HasStackObjects) {
+ // If we have stack objects, we unquestionably need the private buffer
+ // resource. For the HSA ABI, this will be the first 4 user SGPR
+ // inputs. We can reserve those and use them directly.
+
+ unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+
+ unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+ } else {
+ unsigned ReservedBufferReg
+ = TRI->reservedPrivateSegmentBufferReg(MF);
+ unsigned ReservedOffsetReg
+ = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+ // We tentatively reserve the last registers (skipping the last two
+ // which may contain VCC). After register allocation, we'll replace
+ // these with the ones immediately after those which were really
+ // allocated. In the prologue copies will be inserted from the argument
+ // to these reserved registers.
+ Info->setScratchRSrcReg(ReservedBufferReg);
+ Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ }
+ } else {
+ unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+
+ // Without HSA, relocations are used for the scratch pointer and the
+ // buffer resource setup is always inserted in the prologue. Scratch wave
+ // offset is still in an input SGPR.
+ Info->setScratchRSrcReg(ReservedBufferReg);
+
+ if (HasStackObjects) {
+ unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+ MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+ Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ } else {
+ unsigned ReservedOffsetReg
+ = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ }
+ }
+
+ if (Info->hasWorkItemIDX()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ } else
+ llvm_unreachable("workitem id x should always be enabled");
+
+ if (Info->hasWorkItemIDY()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+
+ if (Info->hasWorkItemIDZ()) {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
if (Chains.empty())
@@ -693,27 +875,11 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
- MachineBasicBlock::iterator I = *MI;
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH:
return BB;
- case AMDGPU::SI_RegisterStorePseudo: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
- MachineInstrBuilder MIB =
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
- Reg);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- MIB.addOperand(MI->getOperand(i));
-
- MI->eraseFromParent();
- break;
- }
}
return BB;
}
@@ -944,20 +1110,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
const GlobalValue *GV = GSD->getGlobal();
MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
- SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-
- SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
- DAG.getConstant(1, DL, MVT::i32));
-
- SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrLo, GA);
- SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
- PtrHi, DAG.getConstant(0, DL, MVT::i32),
- SDValue(Lo.getNode(), 1));
- return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+ return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA);
}
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
@@ -977,6 +1131,18 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
// a glue result.
}
+SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
+ SDValue Op,
+ MVT VT,
+ unsigned Offset) const {
+ SDLoc SL(Op);
+ SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
+ DAG.getEntryNode(), Offset, false);
+ // The local size values will have the hi 16-bits as zero.
+ return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
+ DAG.getValueType(VT));
+}
+
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -988,7 +1154,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc DL(Op);
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ // TODO: Should this propagate fast-math-flags?
+
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_dispatch_ptr:
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
+ TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
+
case Intrinsic::r600_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
@@ -1008,37 +1180,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_X);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_Y);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
-
+ return lowerImplicitZextParam(DAG, Op, MVT::i16,
+ SI::KernelInputOffsets::LOCAL_SIZE_Z);
case Intrinsic::AMDGPU_read_workdim:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- getImplicitParameterOffset(MFI, GRID_DIM), false);
-
+ // Really only 2 bits.
+ return lowerImplicitZextParam(DAG, Op, MVT::i8,
+ getImplicitParameterOffset(MFI, GRID_DIM));
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
case Intrinsic::r600_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
case Intrinsic::r600_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
case Intrinsic::r600_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT);
case Intrinsic::r600_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT);
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
- TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+ TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops[] = {
Op.getOperand(1),
@@ -1077,6 +1248,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(2, DL, MVT::i32), // P0
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case AMDGPUIntrinsic::SI_packf16:
+ if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
+ return DAG.getUNDEF(MVT::i32);
+ return Op;
case AMDGPUIntrinsic::SI_fs_interp: {
SDValue IJ = Op.getOperand(4);
SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
@@ -1092,6 +1267,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
Op.getOperand(1), Op.getOperand(2), Glue);
}
+ case Intrinsic::amdgcn_interp_p1: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+ SDValue Glue = M0.getValue(1);
+ return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Glue);
+ }
+ case Intrinsic::amdgcn_interp_p2: {
+ SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+ SDValue Glue = SDValue(M0.getNode(), 1);
+ return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+ Glue);
+ }
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@@ -1152,16 +1340,29 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
"Custom lowering for non-i32 vectors hasn't been implemented.");
unsigned NumElements = Op.getValueType().getVectorNumElements();
assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
+
switch (Load->getAddressSpace()) {
default: break;
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ if (isMemOpUniform(Load))
+ break;
+ // Non-uniform loads will be selected to MUBUF instructions, so they
+ // have the same legalization requires ments as global and private
+ // loads.
+ //
+ // Fall-through
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::PRIVATE_ADDRESS:
+ if (NumElements >= 8)
+ return SplitVectorLoad(Op, DAG);
+
// v4 loads are supported for private and global memory.
if (NumElements <= 4)
break;
// fall-through
case AMDGPUAS::LOCAL_ADDRESS:
- return ScalarizeVectorLoad(Op, DAG);
+ // If properly aligned, if we split we might be able to use ds_read_b64.
+ return SplitVectorLoad(Op, DAG);
}
}
@@ -1236,8 +1437,10 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
if (Unsafe) {
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
+ SDNodeFlags Flags;
+ Flags.setUnsafeAlgebra(true);
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
- return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+ return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
}
return SDValue();
@@ -1274,6 +1477,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ // TODO: Should this propagate fast-math-flags?
+
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
@@ -1379,7 +1584,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Ret;
if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return ScalarizeVectorStore(Op, DAG);
+ return SplitVectorStore(Op, DAG);
if (VT == MVT::i1)
return DAG.getTruncStore(Store->getChain(), DL,
@@ -1393,6 +1598,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Arg = Op.getOperand(0);
+ // TODO: Should this propagate fast-math-flags?
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
DAG.getNode(ISD::FMUL, DL, VT, Arg,
DAG.getConstantFP(0.5/M_PI, DL,
@@ -2125,9 +2331,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
- TII->legalizeOperands(MI);
- if (TII->isMIMG(MI->getOpcode())) {
+ if (TII->isVOP3(MI->getOpcode())) {
+ // Make sure constant bus requirements are respected.
+ TII->legalizeOperandsVOP3(MRI, MI);
+ return;
+ }
+
+ if (TII->isMIMG(*MI)) {
unsigned VReg = MI->getOperand(0).getReg();
unsigned Writemask = MI->getOperand(1).getImm();
unsigned BitsSet = 0;
@@ -2169,53 +2380,38 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const {
const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-#if 1
- // XXX - Workaround for moveToVALU not handling different register class
- // inserts for REG_SEQUENCE.
-
- // Build the half of the subregister with the constants.
- const SDValue Ops0[] = {
- DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
- };
-
- SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, Ops0), 0);
-
- // Combine the constants and the pointer.
- const SDValue Ops1[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
- SubRegHi,
- DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
- };
+ static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+
+ // Build the half of the subregister with the constants before building the
+ // full 128-bit register. If we are building multiple resource descriptors,
+ // this will allow CSEing of the 2-component register.
+ const SDValue Ops0[] = {
+ DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
+ buildSMovImm32(DAG, DL, 0),
+ DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+ buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
+ DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+ };
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
-#else
- const SDValue Ops[] = {
- DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
- Ptr,
- DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
- buildSMovImm32(DAG, DL, 0),
- DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
- buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
- DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
- };
+ SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, Ops0), 0);
- return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
+ // Combine the constants and the pointer.
+ const SDValue Ops1[] = {
+ DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+ Ptr,
+ DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
+ SubRegHi,
+ DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
+ };
-#endif
+ return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
}
/// \brief Return a resource descriptor with the 'Add TID' bit enabled
-/// The TID (Thread ID) is multipled by the stride value (bits [61:48]
-/// of the resource descriptor) to create an offset, which is added to the
-/// resource ponter.
+/// The TID (Thread ID) is multiplied by the stride value (bits [61:48]
+/// of the resource descriptor) to create an offset, which is added to
+/// the resource pointer.
MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr,
@@ -2248,15 +2444,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
}
-MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
-}
-
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
@@ -2274,13 +2461,41 @@ std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- if (Constraint == "r") {
- switch(VT.SimpleTy) {
- default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
- case MVT::i64:
- return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
- case MVT::i32:
+
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 's':
+ case 'r':
+ switch (VT.getSizeInBits()) {
+ default:
+ return std::make_pair(0U, nullptr);
+ case 32:
return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+ case 64:
+ return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ case 128:
+ return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+ case 256:
+ return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ }
+
+ case 'v':
+ switch (VT.getSizeInBits()) {
+ default:
+ return std::make_pair(0U, nullptr);
+ case 32:
+ return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+ case 64:
+ return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+ case 96:
+ return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+ case 128:
+ return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+ case 256:
+ return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+ case 512:
+ return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+ }
}
}
@@ -2301,3 +2516,16 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
+
+SITargetLowering::ConstraintType
+SITargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 's':
+ case 'v':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index d84c32e..e2f8cb1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -28,6 +28,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
+ SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
+ MVT VT, unsigned Offset) const;
+
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
@@ -57,6 +60,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
+ bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
public:
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
@@ -76,6 +80,9 @@ public:
bool MemcpyStrSrc,
MachineFunction &MF) const override;
+ bool isMemOpUniform(const SDNode *N) const;
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
@@ -112,13 +119,10 @@ public:
SDValue Ptr,
uint32_t RsrcDword1,
uint64_t RsrcDword2And3) const;
- MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const;
-
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 90a37f1..821aada 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -91,7 +91,8 @@ private:
bool isOpRelevant(MachineOperand &Op);
/// \brief Get register interval an operand affects.
- RegInterval getRegInterval(MachineOperand &Op);
+ RegInterval getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const;
/// \brief Handle instructions async components
void pushInstruction(MachineBasicBlock &MBB,
@@ -121,9 +122,13 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
const char *getPassName() const override {
- return "SI insert wait instructions";
+ return "SI insert wait instructions";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
@@ -138,9 +143,8 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
}
Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-
- uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
- Counters Result;
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ Counters Result = { { 0, 0, 0 } };
Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
@@ -151,15 +155,22 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
// LGKM may uses larger values
if (TSFlags & SIInstrFlags::LGKM_CNT) {
- if (TII->isSMRD(MI.getOpcode())) {
-
- MachineOperand &Op = MI.getOperand(0);
- assert(Op.isReg() && "First LGKM operand must be a register!");
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
- Result.Named.LGKM = Size > 4 ? 2 : 1;
-
+ if (TII->isSMRD(MI)) {
+
+ if (MI.getNumOperands() != 0) {
+ assert(MI.getOperand(0).isReg() &&
+ "First LGKM operand must be a register!");
+
+ // XXX - What if this is a write into a super register?
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
+ unsigned Size = RC->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+ } else {
+ // s_dcache_inv etc. do not have a a destination register. Assume we
+ // want a wait on these.
+ // XXX - What is the right value?
+ Result.Named.LGKM = 1;
+ }
} else {
// DS
Result.Named.LGKM = 1;
@@ -173,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
}
bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
-
// Constants are always irrelevant
- if (!Op.isReg())
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
return false;
// Defines are always relevant
@@ -196,7 +206,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
// operand comes before the value operand and it may have
// multiple data operands.
- if (TII->isDS(MI.getOpcode())) {
+ if (TII->isDS(MI)) {
MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
if (Data && Op.isIdenticalTo(*Data))
return true;
@@ -224,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
return false;
}
-RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-
- if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
- return std::make_pair(0, 0);
-
- unsigned Reg = Op.getReg();
- unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-
+RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
+ const MachineOperand &Reg) const {
+ unsigned Size = RC->getSize();
assert(Size >= 4);
RegInterval Result;
- Result.first = TRI->getEncodingValue(Reg);
+ Result.first = TRI->getEncodingValue(Reg.getReg());
Result.second = Result.first + Size / 4;
return Result;
@@ -246,10 +251,13 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// Get the hardware counter increments and sum them up
Counters Increment = getHwCounts(*I);
+ Counters Limit = ZeroCounts;
unsigned Sum = 0;
for (unsigned i = 0; i < 3; ++i) {
LastIssued.Array[i] += Increment.Array[i];
+ if (Increment.Array[i])
+ Limit.Array[i] = LastIssued.Array[i];
Sum += Increment.Array[i];
}
@@ -261,7 +269,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+ // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
// or SMEM clause, respectively.
//
// The temporary workaround is to break the clauses with S_NOP.
@@ -270,7 +278,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// and destination registers don't overlap, e.g. this is illegal:
// r0 = load r2
// r2 = load r0
- if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+ if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) ||
(LastOpcodeType == VMEM && Increment.Named.VM)) {
// Insert a NOP to break the clause.
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
@@ -278,7 +286,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
LastInstWritesM0 = false;
}
- if (TII->isSMRD(I->getOpcode()))
+ if (TII->isSMRD(*I))
LastOpcodeType = SMEM;
else if (Increment.Named.VM)
LastOpcodeType = VMEM;
@@ -290,21 +298,21 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-
MachineOperand &Op = I->getOperand(i);
if (!isOpRelevant(Op))
continue;
- RegInterval Interval = getRegInterval(Op);
+ const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
// Remember which registers we define
if (Op.isDef())
- DefinedRegs[j] = LastIssued;
+ DefinedRegs[j] = Limit;
// and which one we are using
if (Op.isUse())
- UsedRegs[j] = LastIssued;
+ UsedRegs[j] = Limit;
}
}
}
@@ -390,12 +398,18 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
if (MI.getOpcode() == AMDGPU::S_SENDMSG)
return LastIssued;
- // For each register affected by this
- // instruction increase the result sequence
+ // For each register affected by this instruction increase the result
+ // sequence.
+ //
+ // TODO: We could probably just look at explicit operands if we removed VCC /
+ // EXEC from SMRD dest reg classes.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-
MachineOperand &Op = MI.getOperand(i);
- RegInterval Interval = getRegInterval(Op);
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
+ RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
if (Op.isDef()) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 211666a..0e883f6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -41,6 +41,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> WQM = 0;
field bits<1> VGPRSpill = 0;
+ // This bit tells the assembler to use the 32-bit encoding in case it
+ // is unable to infer the encoding from the operands.
+ field bits<1> VOPAsmPrefer32Bit = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -68,10 +72,8 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{19} = FLAT;
let TSFlags{20} = WQM;
let TSFlags{21} = VGPRSpill;
+ let TSFlags{22} = VOPAsmPrefer32Bit;
- // Most instructions require adjustments after selection to satisfy
- // operand requirements.
- let hasPostISelHook = 1;
let SchedRW = [Write32Bit];
}
@@ -86,7 +88,6 @@ class Enc64 {
}
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-def VOPDstVCC : VOPDstOperand <VCCReg>;
let Uses = [EXEC] in {
@@ -101,11 +102,11 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOPCCommon <dag ins, string asm, list<dag> pattern> :
- VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
+ VOPAnyCommon <(outs), ins, asm, pattern> {
- let DisableEncoding = "$dst";
let VOPC = 1;
let Size = 4;
+ let Defs = [VCC];
}
class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -138,6 +139,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let isCodeGenOnly = 0;
int Size = 8;
+
+ // Because SGPRs may be allowed if there are multiple operands, we
+ // need a post-isel hook to insert copies in order to avoid
+ // violating constant bus requirements.
+ let hasPostISelHook = 1;
}
} // End Uses = [EXEC]
@@ -222,6 +228,20 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
let Inst{31-27} = 0x18; //encoding
}
+class SMRD_IMMe_ci <bits<5> op> : Enc64 {
+ bits<7> sdst;
+ bits<7> sbase;
+ bits<32> offset;
+
+ let Inst{7-0} = 0xff;
+ let Inst{8} = 0;
+ let Inst{14-9} = sbase{6-1};
+ let Inst{21-15} = sdst;
+ let Inst{26-22} = op;
+ let Inst{31-27} = 0x18; //encoding
+ let Inst{63-32} = offset;
+}
+
let SchedRW = [WriteSALU] in {
class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern> {
@@ -249,13 +269,13 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern>, SOPCe <op> {
- let DisableEncoding = "$dst";
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let SALU = 1;
let SOPC = 1;
let isCodeGenOnly = 0;
+ let Defs = [SCC];
let UseNamedOperandTable = 1;
}
@@ -598,15 +618,13 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
// Vector I/O operations
//===----------------------------------------------------------------------===//
-let Uses = [EXEC] in {
-
class DS <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI <outs, ins, asm, pattern> {
let LGKM_CNT = 1;
let DS = 1;
let UseNamedOperandTable = 1;
- let Uses = [M0];
+ let Uses = [M0, EXEC];
// Most instruction load and store data, so set this as the default.
let mayLoad = 1;
@@ -623,6 +641,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MUBUF = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
@@ -636,6 +655,7 @@ class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MTBUF = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
@@ -665,9 +685,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let VM_CNT = 1;
let EXP_CNT = 1;
let MIMG = 1;
+ let Uses = [EXEC];
let hasSideEffects = 0; // XXX ????
}
-
-
-} // End Uses = [EXEC]
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index cfd2c42..a08a5a8 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -82,6 +82,7 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
switch (MI->getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO:
return true;
default:
return false;
@@ -204,7 +205,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
unsigned &Offset,
const TargetRegisterInfo *TRI) const {
unsigned Opc = LdSt->getOpcode();
- if (isDS(Opc)) {
+
+ if (isDS(*LdSt)) {
const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
AMDGPU::OpName::offset);
if (OffsetImm) {
@@ -254,7 +256,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
return false;
}
- if (isMUBUF(Opc) || isMTBUF(Opc)) {
+ if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
return false;
@@ -270,7 +272,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
return true;
}
- if (isSMRD(Opc)) {
+ if (isSMRD(*LdSt)) {
const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
AMDGPU::OpName::offset);
if (!OffsetImm)
@@ -289,20 +291,18 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
MachineInstr *SecondLdSt,
unsigned NumLoads) const {
- unsigned Opc0 = FirstLdSt->getOpcode();
- unsigned Opc1 = SecondLdSt->getOpcode();
-
// TODO: This needs finer tuning
if (NumLoads > 4)
return false;
- if (isDS(Opc0) && isDS(Opc1))
+ if (isDS(*FirstLdSt) && isDS(*SecondLdSt))
return true;
- if (isSMRD(Opc0) && isSMRD(Opc1))
+ if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt))
return true;
- if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+ if ((isMUBUF(*FirstLdSt) || isMTBUF(*FirstLdSt)) &&
+ (isMUBUF(*SecondLdSt) || isMTBUF(*SecondLdSt)))
return true;
return false;
@@ -323,28 +323,45 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ };
+
+ static const int16_t Sub0_15_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
};
static const int16_t Sub0_7[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ };
+
+ static const int16_t Sub0_7_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
};
static const int16_t Sub0_3[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ };
+
+ static const int16_t Sub0_3_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
};
static const int16_t Sub0_2[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
};
static const int16_t Sub0_1[] = {
- AMDGPU::sub0, AMDGPU::sub1, 0
+ AMDGPU::sub0, AMDGPU::sub1,
};
unsigned Opcode;
- const int16_t *SubIndices;
+ ArrayRef<int16_t> SubIndices;
+ bool Forward;
if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
@@ -360,7 +377,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
- BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
.addImm(0)
.addReg(SrcReg, getKillRegState(KillSrc));
}
@@ -375,18 +392,18 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_3;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_3_64;
} else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_7;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_7_64;
} else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
- Opcode = AMDGPU::S_MOV_B32;
- SubIndices = Sub0_15;
+ Opcode = AMDGPU::S_MOV_B64;
+ SubIndices = Sub0_15_64;
} else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
@@ -428,13 +445,27 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
llvm_unreachable("Can't copy register!");
}
- while (unsigned SubIdx = *SubIndices++) {
+ if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
+ Forward = true;
+ else
+ Forward = false;
+
+ for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
+ unsigned SubIdx;
+ if (Forward)
+ SubIdx = SubIndices[Idx];
+ else
+ SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+
MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
get(Opcode), RI.getSubReg(DestReg, SubIdx));
- Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+ Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
+
+ if (Idx == SubIndices.size() - 1)
+ Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit);
- if (*SubIndices)
+ if (Idx == 0)
Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
}
}
@@ -471,6 +502,40 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
return AMDGPU::COPY;
}
+static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_S32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_S64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_S128_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_S256_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_S512_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_V32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_V64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_V128_SAVE;
+ case 32:
+ return AMDGPU::SI_SPILL_V256_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_V512_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
@@ -481,47 +546,83 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
+
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+ unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+ MachineMemOperand *MMO
+ = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ Size, Align);
if (RI.isSGPRClass(RC)) {
+ MFI->setHasSpilledSGPRs();
+
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for spilling
// SGPRs.
- switch (RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- MFI->setHasSpilledVGPRs();
-
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
- }
+ unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
+ BuildMI(MBB, MI, DL, get(Opcode))
+ .addReg(SrcReg) // src
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addMemOperand(MMO);
+
+ return;
}
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
- BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else {
+ if (!ST.isVGPRSpillingEnabled(MFI)) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
" spill register");
BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
- .addReg(SrcReg);
+ .addReg(SrcReg);
+
+ return;
+ }
+
+ assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+ unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
+ MFI->setHasSpilledVGPRs();
+ BuildMI(MBB, MI, DL, get(Opcode))
+ .addReg(SrcReg) // src
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addMemOperand(MMO);
+}
+
+static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_S32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_S64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_S128_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_S256_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_S512_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_V32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_V64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_V128_RESTORE;
+ case 32:
+ return AMDGPU::SI_SPILL_V256_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_V512_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
}
}
@@ -534,42 +635,43 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
- int Opcode = -1;
-
- if (RI.isSGPRClass(RC)){
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
- }
- } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
- switch(RC->getSize() * 8) {
- case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
- case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
- case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
- case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
- case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
- case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
- }
- }
+ unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+ unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
- if (Opcode != -1) {
- FrameInfo->setObjectAlignment(FrameIndex, 4);
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+
+ if (RI.isSGPRClass(RC)) {
+ // FIXME: Maybe this should not include a memoperand because it will be
+ // lowered to non-memory instructions.
+ unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex)
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addMemOperand(MMO);
- } else {
+ return;
+ }
+
+ if (!ST.isVGPRSpillingEnabled(MFI)) {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
" restore register");
BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
+
+ return;
}
+
+ assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+ unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addFrameIndex(FrameIndex) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addMemOperand(MMO);
}
/// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -601,17 +703,21 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
if (MFI->getShaderType() == ShaderType::COMPUTE &&
WorkGroupSize > WavefrontSize) {
- unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
- unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
- unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+ unsigned TIDIGXReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
+ unsigned TIDIGYReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
+ unsigned TIDIGZReg
+ = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
unsigned InputPtrReg =
- TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+ TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
if (!Entry.isLiveIn(Reg))
Entry.addLiveIn(Reg);
}
RS->enterBasicBlock(&Entry);
+ // FIXME: Can we scavenge an SReg_64 and access the subregs?
unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
@@ -667,8 +773,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
return TmpReg;
}
-void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
- int Count) const {
+void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
+ int Count) const {
while (Count > 0) {
int Arg;
if (Count >= 8)
@@ -687,26 +793,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
- case AMDGPU::SI_CONSTDATA_PTR: {
- unsigned Reg = MI->getOperand(0).getReg();
- unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
- unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
- BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
- // Add 32-bit offset from this instruction to the start of the constant data.
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
- .addReg(RegLo)
- .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
- BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
- .addReg(RegHi)
- .addImm(0)
- .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
- .addReg(AMDGPU::SCC, RegState::Implicit);
- MI->eraseFromParent();
- break;
- }
case AMDGPU::SGPR_USE:
// This is just a placeholder for register allocation.
MI->eraseFromParent();
@@ -760,49 +846,90 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MI->eraseFromParent();
break;
}
+
+ case AMDGPU::SI_CONSTDATA_PTR: {
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+ MachineFunction &MF = *MBB.getParent();
+ unsigned Reg = MI->getOperand(0).getReg();
+ unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+
+ // Create a bundle so these instructions won't be re-ordered by the
+ // post-RA scheduler.
+ MIBundleBuilder Bundler(MBB, MI);
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
+
+ // Add 32-bit offset from this instruction to the start of the
+ // constant data.
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
+ .addReg(RegLo)
+ .addOperand(MI->getOperand(1)));
+ Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+ .addReg(RegHi)
+ .addImm(0));
+
+ llvm::finalizeBundle(MBB, Bundler.begin());
+
+ MI->eraseFromParent();
+ break;
+ }
}
return true;
}
-MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
- bool NewMI) const {
-
- if (MI->getNumOperands() < 3)
- return nullptr;
-
+/// Commutes the operands in the given instruction.
+/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
+///
+/// Do not call this method for a non-commutable instruction or for
+/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
+/// Even though the instruction is commutable, the method may still
+/// fail to commute the operands, null pointer is returned in such cases.
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx0,
+ unsigned OpIdx1) const {
int CommutedOpcode = commuteOpcode(*MI);
if (CommutedOpcode == -1)
return nullptr;
int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src0);
- assert(Src0Idx != -1 && "Should always have src0 operand");
-
MachineOperand &Src0 = MI->getOperand(Src0Idx);
if (!Src0.isReg())
return nullptr;
int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::src1);
- if (Src1Idx == -1)
+
+ if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
+ OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
+ (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
+ OpIdx1 != static_cast<unsigned>(Src0Idx)))
return nullptr;
MachineOperand &Src1 = MI->getOperand(Src1Idx);
- // Make sure it's legal to commute operands for VOP2.
- if (isVOP2(MI->getOpcode()) &&
- (!isOperandLegal(MI, Src0Idx, &Src1) ||
- !isOperandLegal(MI, Src1Idx, &Src0))) {
- return nullptr;
+
+ if (isVOP2(*MI)) {
+ const MCInstrDesc &InstrDesc = MI->getDesc();
+ // For VOP2 instructions, any operand type is valid to use for src0. Make
+ // sure we can use the src1 as src0.
+ //
+ // We could be stricter here and only allow commuting if there is a reason
+ // to do so. i.e. if both operands are VGPRs there is no real benefit,
+ // although MachineCSE attempts to find matches by commuting.
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
+ return nullptr;
}
if (!Src1.isReg()) {
// Allow commuting instructions with Imm operands.
if (NewMI || !Src1.isImm() ||
- (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+ (!isVOP2(*MI) && !isVOP3(*MI))) {
return nullptr;
}
-
// Be sure to copy the source modifiers to the right place.
if (MachineOperand *Src0Mods
= getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
@@ -832,7 +959,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
Src1.ChangeToRegister(Reg, false);
Src1.setSubReg(SubReg);
} else {
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
}
if (MI)
@@ -845,8 +972,8 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
// between the true commutable operands, and the base
// TargetInstrInfo::commuteInstruction uses it.
bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const {
+ unsigned &SrcOpIdx0,
+ unsigned &SrcOpIdx1) const {
const MCInstrDesc &MCID = MI->getDesc();
if (!MCID.isCommutable())
return false;
@@ -857,7 +984,8 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
return false;
// FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
- // immediate.
+ // immediate. Also, immediate src0 operand is not handled in
+ // SIInstrInfo::commuteInstruction();
if (!MI->getOperand(Src0Idx).isReg())
return false;
@@ -865,18 +993,22 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
if (Src1Idx == -1)
return false;
- if (!MI->getOperand(Src1Idx).isReg())
- return false;
-
- // If any source modifiers are set, the generic instruction commuting won't
- // understand how to copy the source modifiers.
- if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
+ if (Src1.isImm()) {
+ // SIInstrInfo::commuteInstruction() does support commuting the immediate
+ // operand src1 in 2 and 3 operand instructions.
+ if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
+ return false;
+ } else if (Src1.isReg()) {
+ // If any source modifiers are set, the generic instruction commuting won't
+ // understand how to copy the source modifiers.
+ if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+ return false;
+ } else
return false;
- SrcOpIdx1 = Src0Idx;
- SrcOpIdx2 = Src1Idx;
- return true;
+ return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
}
MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
@@ -898,11 +1030,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const {
}
}
-bool
-SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
- return RC != &AMDGPU::EXECRegRegClass;
-}
-
static void removeModOperands(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
@@ -984,9 +1111,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
}
- UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
- AMDGPU::OpName::src2));
- // ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);
removeModOperands(*UseMI);
@@ -1045,18 +1169,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return false;
}
-bool
-SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA) const {
- switch(MI->getOpcode()) {
- default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::V_MOV_B32_e32:
- return MI->getOperand(1).isImm();
- }
-}
-
static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
int WidthB, int OffsetB) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
@@ -1088,9 +1200,6 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
MachineInstr *MIb,
AliasAnalysis *AA) const {
- unsigned Opc0 = MIa->getOpcode();
- unsigned Opc1 = MIb->getOpcode();
-
assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
"MIa must load from or modify a memory location");
assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
@@ -1105,32 +1214,32 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
// TODO: Should we check the address space from the MachineMemOperand? That
// would allow us to distinguish objects we know don't alias based on the
- // underlying addres space, even if it was lowered to a different one,
+ // underlying address space, even if it was lowered to a different one,
// e.g. private accesses lowered to use MUBUF instructions on a scratch
// buffer.
- if (isDS(Opc0)) {
- if (isDS(Opc1))
+ if (isDS(*MIa)) {
+ if (isDS(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1);
+ return !isFLAT(*MIb);
}
- if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
- if (isMUBUF(Opc1) || isMTBUF(Opc1))
+ if (isMUBUF(*MIa) || isMTBUF(*MIa)) {
+ if (isMUBUF(*MIb) || isMTBUF(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1) && !isSMRD(Opc1);
+ return !isFLAT(*MIb) && !isSMRD(*MIb);
}
- if (isSMRD(Opc0)) {
- if (isSMRD(Opc1))
+ if (isSMRD(*MIa)) {
+ if (isSMRD(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
- return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+ return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa);
}
- if (isFLAT(Opc0)) {
- if (isFLAT(Opc1))
+ if (isFLAT(*MIa)) {
+ if (isFLAT(*MIb))
return checkInstOffsetsDoNotOverlap(MIa, MIb);
return false;
@@ -1319,6 +1428,26 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
return false;
}
+static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.implicit_operands()) {
+ // We only care about reads.
+ if (MO.isDef())
+ continue;
+
+ switch (MO.getReg()) {
+ case AMDGPU::VCC:
+ case AMDGPU::M0:
+ case AMDGPU::FLAT_SCR:
+ return MO.getReg();
+
+ default:
+ break;
+ }
+ }
+
+ return AMDGPU::NoRegister;
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
@@ -1335,7 +1464,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
- // Make sure the register classes are correct
+ // Make sure the register classes are correct.
for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isFPImm()) {
ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
@@ -1392,14 +1521,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Verify VOP*
- if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+ if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) {
// Only look at the true operands. Only a real operand can use the constant
// bus, and we don't want to check pseudo-operands like the source modifier
// flags.
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
unsigned ConstantBusCount = 0;
- unsigned SGPRUsed = AMDGPU::NoRegister;
+ unsigned SGPRUsed = findImplicitSGPRRead(*MI);
+ if (SGPRUsed != AMDGPU::NoRegister)
+ ++ConstantBusCount;
+
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
break;
@@ -1435,6 +1567,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
}
+ // Make sure we aren't losing exec uses in the td files. This mostly requires
+ // being careful when using let Uses to try to add other use registers.
+ if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
+ const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
+ if (!Exec || !Exec->isImplicit()) {
+ ErrInfo = "VALU instruction does not implicitly read exec mask";
+ return false;
+ }
+ }
+
return true;
}
@@ -1483,11 +1625,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ case AMDGPU::S_LOAD_DWORD_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX2_SGPR:
+ case AMDGPU::S_LOAD_DWORDX2_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_IMM_ci:
+ return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -1562,17 +1710,21 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
unsigned SubIdx,
const TargetRegisterClass *SubRC)
const {
- assert(SuperReg.isReg());
-
- unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+ MachineBasicBlock *MBB = MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
unsigned SubReg = MRI.createVirtualRegister(SubRC);
+ if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
+ BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+ .addReg(SuperReg.getReg(), 0, SubIdx);
+ return SubReg;
+ }
+
// Just in case the super register is itself a sub-register, copy it to a new
// value so we don't need to worry about merging its subreg index with the
// SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
+ unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
.addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
@@ -1605,36 +1757,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
return MachineOperand::CreateReg(SubReg, false);
}
-unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const {
- MachineBasicBlock *MBB = MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- unsigned Dst = MRI.createVirtualRegister(RC);
-
- MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- LoDst)
- .addImm(Op.getImm() & 0xFFFFFFFF);
- MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
- HiDst)
- .addImm(Op.getImm() >> 32);
-
- BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
- .addReg(LoDst)
- .addImm(AMDGPU::sub0)
- .addReg(HiDst)
- .addImm(AMDGPU::sub1);
-
- Worklist.push_back(Lo);
- Worklist.push_back(Hi);
-
- return Dst;
-}
-
// Change the order of operands from (0, 1, 2) to (0, 2, 1)
void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
assert(Inst->getNumExplicitOperands() == 3);
@@ -1643,6 +1765,41 @@ void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
Inst->addOperand(Op1);
}
+bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (!MO.isReg())
+ return false;
+
+ unsigned Reg = MO.getReg();
+ const TargetRegisterClass *RC =
+ TargetRegisterInfo::isVirtualRegister(Reg) ?
+ MRI.getRegClass(Reg) :
+ RI.getPhysRegClass(Reg);
+
+ // In order to be legal, the common sub-class must be equal to the
+ // class of the current operand. For example:
+ //
+ // v_mov_b32 s0 ; Operand defined as vsrc_32
+ // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+ //
+ // s_sendmsg 0, s0 ; Operand defined as m0reg
+ // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+ return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+}
+
+bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const {
+ if (MO.isReg())
+ return isLegalRegOperand(MRI, OpInfo, MO);
+
+ // Handle non-register types that are treated like immediates.
+ assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+ return true;
+}
+
bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
const MachineOperand *MO) const {
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -1653,7 +1810,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (!MO)
MO = &MI->getOperand(OpIdx);
- if (isVALU(InstDesc.Opcode) &&
+ if (isVALU(*MI) &&
usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
unsigned SGPRUsed =
MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
@@ -1670,21 +1827,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
if (MO->isReg()) {
assert(DefinedRC);
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
- MRI.getRegClass(MO->getReg()) :
- RI.getPhysRegClass(MO->getReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_32
- // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
-
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ return isLegalRegOperand(MRI, OpInfo, *MO);
}
@@ -1699,81 +1842,143 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
return isImmOperandLegal(MI, OpIdx, *MO);
}
-void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
+ MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &InstrDesc = get(Opc);
- int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src0);
- int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src1);
- int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::src2);
+ int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+ MachineOperand &Src1 = MI->getOperand(Src1Idx);
- // Legalize VOP2
- if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
- // Legalize src0
- if (!isOperandLegal(MI, Src0Idx))
+ // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
+ // we need to only have one constant bus use.
+ //
+ // Note we do not need to worry about literal constants here. They are
+ // disabled for the operand type for instructions because they will always
+ // violate the one constant bus use rule.
+ bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
+ if (HasImplicitSGPR) {
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+
+ if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
legalizeOpWithMove(MI, Src0Idx);
+ }
- // Legalize src1
- if (isOperandLegal(MI, Src1Idx))
- return;
+ // VOP2 src0 instructions support all operand types, so we don't need to check
+ // their legality. If src1 is already legal, we don't need to do anything.
+ if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
+ return;
- // Usually src0 of VOP2 instructions allow more types of inputs
- // than src1, so try to commute the instruction to decrease our
- // chances of having to insert a MOV instruction to legalize src1.
- if (MI->isCommutable()) {
- if (commuteInstruction(MI))
- // If we are successful in commuting, then we know MI is legal, so
- // we are done.
- return;
- }
+ // We do not use commuteInstruction here because it is too aggressive and will
+ // commute if it is possible. We only want to commute here if it improves
+ // legality. This can be called a fairly large number of times so don't waste
+ // compile time pointlessly swapping and checking legality again.
+ if (HasImplicitSGPR || !MI->isCommutable()) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
+
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI->getOperand(Src0Idx);
+ // If src0 can be used as src1, commuting will make the operands legal.
+ // Otherwise we have to give up and insert a move.
+ //
+ // TODO: Other immediate-like operand kinds could be commuted if there was a
+ // MachineOperand::ChangeTo* for them.
+ if ((!Src1.isImm() && !Src1.isReg()) ||
+ !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
legalizeOpWithMove(MI, Src1Idx);
return;
}
- // XXX - Do any VOP3 instructions read VCC?
- // Legalize VOP3
- if (isVOP3(MI->getOpcode())) {
- int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+ int CommutedOpc = commuteOpcode(*MI);
+ if (CommutedOpc == -1) {
+ legalizeOpWithMove(MI, Src1Idx);
+ return;
+ }
- // Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+ MI->setDesc(get(CommutedOpc));
- for (unsigned i = 0; i < 3; ++i) {
- int Idx = VOP3Idx[i];
- if (Idx == -1)
- break;
- MachineOperand &MO = MI->getOperand(Idx);
+ unsigned Src0Reg = Src0.getReg();
+ unsigned Src0SubReg = Src0.getSubReg();
+ bool Src0Kill = Src0.isKill();
- if (MO.isReg()) {
- if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- continue; // VGPRs are legal
+ if (Src1.isImm())
+ Src0.ChangeToImmediate(Src1.getImm());
+ else if (Src1.isReg()) {
+ Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
+ Src0.setSubReg(Src1.getSubReg());
+ } else
+ llvm_unreachable("Should only have register or immediate operands");
- assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+ Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
+ Src1.setSubReg(Src0SubReg);
+}
- if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
- SGPRReg = MO.getReg();
- // We can use one SGPR in each VOP3 instruction.
- continue;
- }
- } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
- // If it is not a register and not a literal constant, then it must be
- // an inline constant which is always legal.
- continue;
- }
- // If we make it this far, then the operand is not legal and we must
- // legalize it.
- legalizeOpWithMove(MI, Idx);
+// Legalize VOP3 operands. Because all operand types are supported for any
+// operand, and since literal constants are not allowed and should never be
+// seen, we only need to worry about inserting copies if we use multiple SGPR
+// operands.
+void SIInstrInfo::legalizeOperandsVOP3(
+ MachineRegisterInfo &MRI,
+ MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ int VOP3Idx[3] = {
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
+ AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
+ };
+
+ // Find the one SGPR operand we are allowed to use.
+ unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
+ for (unsigned i = 0; i < 3; ++i) {
+ int Idx = VOP3Idx[i];
+ if (Idx == -1)
+ break;
+ MachineOperand &MO = MI->getOperand(Idx);
+
+ // We should never see a VOP3 instruction with an illegal immediate operand.
+ if (!MO.isReg())
+ continue;
+
+ if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ continue; // VGPRs are legal
+
+ if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+ SGPRReg = MO.getReg();
+ // We can use one SGPR in each VOP3 instruction.
+ continue;
}
+
+ // If we make it this far, then the operand is not legal and we must
+ // legalize it.
+ legalizeOpWithMove(MI, Idx);
+ }
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ // Legalize VOP2
+ if (isVOP2(*MI)) {
+ legalizeOperandsVOP2(MRI, MI);
+ return;
+ }
+
+ // Legalize VOP3
+ if (isVOP3(*MI)) {
+ legalizeOperandsVOP3(MRI, MI);
+ return;
}
// Legalize REG_SEQUENCE and PHI
// The register class of the operands much be the same type as the register
// class of the output.
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
- MI->getOpcode() == AMDGPU::PHI) {
+ if (MI->getOpcode() == AMDGPU::PHI) {
const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
if (!MI->getOperand(i).isReg() ||
@@ -1802,26 +2007,53 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
// Update all the operands so they have the same type.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
- if (!MI->getOperand(i).isReg() ||
- !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
continue;
unsigned DstReg = MRI.createVirtualRegister(RC);
- MachineBasicBlock *InsertBB;
- MachineBasicBlock::iterator Insert;
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
- InsertBB = MI->getParent();
- Insert = MI;
- } else {
- // MI is a PHI instruction.
- InsertBB = MI->getOperand(i + 1).getMBB();
- Insert = InsertBB->getFirstTerminator();
+
+ // MI is a PHI instruction.
+ MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
+ MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
+
+ BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+ Op.setReg(DstReg);
+ }
+ }
+
+ // REG_SEQUENCE doesn't really require operand legalization, but if one has a
+ // VGPR dest type and SGPR sources, insert copies so all operands are
+ // VGPRs. This seems to help operand folding / the register coalescer.
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ MachineBasicBlock *MBB = MI->getParent();
+ const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
+ if (RI.hasVGPRs(DstRC)) {
+ // Update all the operands so they are VGPR register classes. These may
+ // not be the same register class because REG_SEQUENCE supports mixing
+ // subregister index types e.g. sub0_sub1 + sub2 + sub3
+ for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+ MachineOperand &Op = MI->getOperand(I);
+ if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+ continue;
+
+ const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
+ const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
+ if (VRC == OpRC)
+ continue;
+
+ unsigned DstReg = MRI.createVirtualRegister(VRC);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+ .addOperand(Op);
+
+ Op.setReg(DstReg);
+ Op.setIsKill();
}
- BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
- get(AMDGPU::COPY), DstReg)
- .addOperand(MI->getOperand(i));
- MI->getOperand(i).setReg(DstReg);
}
+
+ return;
}
// Legalize INSERT_SUBREG
@@ -1858,15 +2090,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
MachineBasicBlock &MBB = *MI->getParent();
- // Extract the ptr from the resource descriptor.
-
- // SRsrcPtrLo = srsrc:sub0
- unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
- // SRsrcPtrHi = srsrc:sub1
- unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
- &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+ // Extract the ptr from the resource descriptor.
+ unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
+ &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
// Create an empty resource descriptor
unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -1891,80 +2118,112 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
.addImm(RsrcDataFormat >> 32);
// NewSRsrc = {Zero64, SRsrcFormat}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewSRsrc)
- .addReg(Zero64)
- .addImm(AMDGPU::sub0_sub1)
- .addReg(SRsrcFormatLo)
- .addImm(AMDGPU::sub2)
- .addReg(SRsrcFormatHi)
- .addImm(AMDGPU::sub3);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+ .addReg(Zero64)
+ .addImm(AMDGPU::sub0_sub1)
+ .addReg(SRsrcFormatLo)
+ .addImm(AMDGPU::sub2)
+ .addReg(SRsrcFormatHi)
+ .addImm(AMDGPU::sub3);
MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
- unsigned NewVAddrLo;
- unsigned NewVAddrHi;
if (VAddr) {
// This is already an ADDR64 instruction so we need to add the pointer
// extracted from the resource descriptor to the current value of VAddr.
- NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
- NewVAddrLo)
- .addReg(SRsrcPtrLo)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
-
- // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
- NewVAddrHi)
- .addReg(SRsrcPtrHi)
- .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
- .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
- .addReg(AMDGPU::VCC, RegState::Implicit);
-
+ unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
+
+ // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
+
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(NewVAddrLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(NewVAddrHi)
+ .addImm(AMDGPU::sub1);
} else {
// This instructions is the _OFFSET variant, so we need to convert it to
// ADDR64.
+ assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
+ < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+ "FIXME: Need to emit flat atomics here");
+
MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
-
- // Create the new instruction.
unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
- MachineInstr *Addr64 =
- BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
- // This will be replaced later
- // with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0); // tfe
+
+ // Atomics rith return have have an additional tied operand and are
+ // missing some of the special bits.
+ MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
+ MachineInstr *Addr64;
+
+ if (!VDataIn) {
+ // Regular buffer load / store.
+ MachineInstrBuilder MIB
+ = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset);
+
+ // Atomics do not have this operand.
+ if (const MachineOperand *GLC
+ = getNamedOperand(*MI, AMDGPU::OpName::glc)) {
+ MIB.addImm(GLC->getImm());
+ }
+
+ MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
+
+ if (const MachineOperand *TFE
+ = getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
+ MIB.addImm(TFE->getImm());
+ }
+
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ Addr64 = MIB;
+ } else {
+ // Atomics with return.
+ Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+ .addOperand(*VData)
+ .addOperand(*VDataIn)
+ .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+ // This will be replaced later
+ // with the new value of vaddr.
+ .addOperand(*SRsrc)
+ .addOperand(*SOffset)
+ .addOperand(*Offset)
+ .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ }
MI->removeFromParent();
MI = Addr64;
- NewVAddrLo = SRsrcPtrLo;
- NewVAddrHi = SRsrcPtrHi;
+ // NewVaddr = {NewVaddrHi, NewVaddrLo}
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+ .addImm(AMDGPU::sub1);
+
VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
}
- // NewVaddr = {NewVaddrHi, NewVaddrLo}
- BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
- NewVAddr)
- .addReg(NewVAddrLo)
- .addImm(AMDGPU::sub0)
- .addReg(NewVAddrHi)
- .addImm(AMDGPU::sub1);
-
-
// Update the instruction to use NewVaddr
VAddr->setReg(NewVAddr);
// Update the instruction to use NewSRsrc
@@ -2028,53 +2287,64 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
.addOperand(*SOff);
unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
- .addOperand(*SOff)
- .addImm(HalfSize);
- Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+ .addReg(SOff->getReg(), 0, SOff->getSubReg())
+ .addImm(HalfSize);
+ Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
.addReg(SBase->getReg(), getKillRegState(IsKill),
SBase->getSubReg())
.addReg(OffsetSGPR);
}
unsigned SubLo, SubHi;
+ const TargetRegisterClass *NewDstRC;
switch (HalfSize) {
case 4:
SubLo = AMDGPU::sub0;
SubHi = AMDGPU::sub1;
+ NewDstRC = &AMDGPU::VReg_64RegClass;
break;
case 8:
SubLo = AMDGPU::sub0_sub1;
SubHi = AMDGPU::sub2_sub3;
+ NewDstRC = &AMDGPU::VReg_128RegClass;
break;
case 16:
SubLo = AMDGPU::sub0_sub1_sub2_sub3;
SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+ NewDstRC = &AMDGPU::VReg_256RegClass;
break;
case 32:
SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+ NewDstRC = &AMDGPU::VReg_512RegClass;
break;
default:
llvm_unreachable("Unhandled HalfSize");
}
- BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
- .addOperand(MI->getOperand(0))
- .addReg(RegLo)
- .addImm(SubLo)
- .addReg(RegHi)
- .addImm(SubHi);
+ unsigned OldDst = MI->getOperand(0).getReg();
+ unsigned NewDst = MRI.createVirtualRegister(NewDstRC);
+
+ MRI.replaceRegWith(OldDst, NewDst);
+
+ BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDst)
+ .addReg(RegLo)
+ .addImm(SubLo)
+ .addReg(RegHi)
+ .addImm(SubHi);
}
-void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI,
+ MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const {
MachineBasicBlock *MBB = MI->getParent();
- switch (MI->getOpcode()) {
- case AMDGPU::S_LOAD_DWORD_IMM:
- case AMDGPU::S_LOAD_DWORD_SGPR:
- case AMDGPU::S_LOAD_DWORDX2_IMM:
- case AMDGPU::S_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_LOAD_DWORDX4_IMM:
- case AMDGPU::S_LOAD_DWORDX4_SGPR: {
+ int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ assert(DstIdx != -1);
+ unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass;
+ switch(RI.getRegClass(DstRCID)->getSize()) {
+ case 4:
+ case 8:
+ case 16: {
unsigned NewOpcode = getVALUOp(*MI);
unsigned RegOffset;
unsigned ImmOffset;
@@ -2118,53 +2388,55 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
.addImm(RsrcDataFormat >> 32);
BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
- .addReg(DWord0)
- .addImm(AMDGPU::sub0)
- .addReg(DWord1)
- .addImm(AMDGPU::sub1)
- .addReg(DWord2)
- .addImm(AMDGPU::sub2)
- .addReg(DWord3)
- .addImm(AMDGPU::sub3);
- MI->setDesc(get(NewOpcode));
- if (MI->getOperand(2).isReg()) {
- MI->getOperand(2).setReg(SRsrc);
- } else {
- MI->getOperand(2).ChangeToRegister(SRsrc, false);
- }
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
- MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
-
- const TargetRegisterClass *NewDstRC =
- RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
-
- unsigned DstReg = MI->getOperand(0).getReg();
+ .addReg(DWord0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DWord1)
+ .addImm(AMDGPU::sub1)
+ .addReg(DWord2)
+ .addImm(AMDGPU::sub2)
+ .addReg(DWord3)
+ .addImm(AMDGPU::sub3);
+
+ const MCInstrDesc &NewInstDesc = get(NewOpcode);
+ const TargetRegisterClass *NewDstRC
+ = RI.getRegClass(NewInstDesc.OpInfo[0].RegClass);
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ unsigned DstReg = MI->getOperand(0).getReg();
MRI.replaceRegWith(DstReg, NewDstReg);
+
+ MachineInstr *NewInst =
+ BuildMI(*MBB, MI, MI->getDebugLoc(), NewInstDesc, NewDstReg)
+ .addOperand(MI->getOperand(1)) // sbase
+ .addReg(SRsrc)
+ .addImm(0)
+ .addImm(ImmOffset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MI->eraseFromParent();
+
+ legalizeOperands(NewInst);
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
break;
}
- case AMDGPU::S_LOAD_DWORDX8_IMM:
- case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+ case 32: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
+ moveSMRDToVALU(Lo, MRI, Worklist);
+ moveSMRDToVALU(Hi, MRI, Worklist);
break;
}
- case AMDGPU::S_LOAD_DWORDX16_IMM:
- case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+ case 64: {
MachineInstr *Lo, *Hi;
splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
MI->eraseFromParent();
- moveSMRDToVALU(Lo, MRI);
- moveSMRDToVALU(Hi, MRI);
+ moveSMRDToVALU(Lo, MRI, Worklist);
+ moveSMRDToVALU(Hi, MRI, Worklist);
break;
}
}
@@ -2185,51 +2457,28 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// Handle some special cases
switch (Opcode) {
default:
- if (isSMRD(Inst->getOpcode())) {
- moveSMRDToVALU(Inst, MRI);
+ if (isSMRD(*Inst)) {
+ moveSMRDToVALU(Inst, MRI, Worklist);
+ continue;
}
break;
- case AMDGPU::S_MOV_B64: {
- DebugLoc DL = Inst->getDebugLoc();
-
- // If the source operand is a register we can replace this with a
- // copy.
- if (Inst->getOperand(1).isReg()) {
- MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
- .addOperand(Inst->getOperand(0))
- .addOperand(Inst->getOperand(1));
- Worklist.push_back(Copy);
- } else {
- // Otherwise, we need to split this into two movs, because there is
- // no 64-bit VALU move instruction.
- unsigned Reg = Inst->getOperand(0).getReg();
- unsigned Dst = split64BitImm(Worklist,
- Inst,
- MRI,
- MRI.getRegClass(Reg),
- Inst->getOperand(1));
- MRI.replaceRegWith(Reg, Dst);
- }
- Inst->eraseFromParent();
- continue;
- }
case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
Inst->eraseFromParent();
continue;
@@ -2281,6 +2530,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
break;
+ case AMDGPU::S_ABS_I32:
+ lowerScalarAbs(Worklist, Inst);
+ Inst->eraseFromParent();
+ continue;
+
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
@@ -2319,7 +2573,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst->addOperand(MachineOperand::CreateImm(0));
}
- addDescImplicitUseDef(NewDesc, Inst);
+ Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
@@ -2337,27 +2591,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
}
// Update the destination register class.
-
- const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
-
- switch (Opcode) {
- // For target instructions, getOpRegClass just returns the virtual
- // register class associated with the operand, so we need to find an
- // equivalent VGPR register class in order to move the instruction to the
- // VALU.
- case AMDGPU::COPY:
- case AMDGPU::PHI:
- case AMDGPU::REG_SEQUENCE:
- case AMDGPU::INSERT_SUBREG:
- if (RI.hasVGPRs(NewDstRC))
- continue;
- NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
- if (!NewDstRC)
- continue;
- break;
- default:
- break;
- }
+ const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst);
+ if (!NewDstRC)
+ continue;
unsigned DstReg = Inst->getOperand(0).getReg();
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
@@ -2366,13 +2602,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// Legalize the operands
legalizeOperands(Inst);
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
- E = MRI.use_end(); I != E; ++I) {
- MachineInstr &UseMI = *I->getParent();
- if (!canReadVGPR(UseMI, I.getOperandNo())) {
- Worklist.push_back(&UseMI);
- }
- }
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
}
}
@@ -2390,6 +2620,30 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::VGPR_32RegClass;
}
+void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+ .addImm(0)
+ .addReg(Src.getReg());
+
+ BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+ .addReg(Src.getReg())
+ .addReg(TmpReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::splitScalar64BitUnaryOp(
SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst,
@@ -2414,20 +2668,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
- MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
- MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -2436,10 +2691,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.replaceRegWith(Dest.getReg(), FullDestReg);
- // Try to legalize the operands in case we need to swap the order to keep it
- // valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ // We don't need to legalizeOperands here because for a single operand, src0
+ // will support any kind of input.
+
+ // Move all users of this moved value.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBinaryOp(
@@ -2474,9 +2730,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
AMDGPU::sub0, Src1SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
- const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+ const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
- unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
.addOperand(SrcReg0Sub0)
.addOperand(SrcReg1Sub0);
@@ -2486,12 +2743,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
AMDGPU::sub1, Src1SubRC);
- unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
.addOperand(SrcReg0Sub1)
.addOperand(SrcReg1Sub1);
- unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
.addReg(DestSub0)
.addImm(AMDGPU::sub0)
@@ -2502,8 +2759,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
// Try to legalize the operands in case we need to swap the order to keep it
// valid.
- Worklist.push_back(LoHalf);
- Worklist.push_back(HiHalf);
+ legalizeOperands(LoHalf);
+ legalizeOperands(HiHalf);
+
+ // Move all users of this moved vlaue.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2532,18 +2792,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub1, SrcSubRC);
- MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ BuildMI(MBB, MII, DL, InstDesc, MidReg)
.addOperand(SrcRegSub0)
.addImm(0);
- MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ BuildMI(MBB, MII, DL, InstDesc, ResultReg)
.addOperand(SrcRegSub1)
.addReg(MidReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
- Worklist.push_back(First);
- Worklist.push_back(Second);
+ // We don't need to legalize operands here. src0 for etiher instruction can be
+ // an SGPR, and the second input is unused or determined here.
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2587,6 +2848,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
return;
}
@@ -2605,33 +2867,53 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
.addImm(AMDGPU::sub1);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
- MachineInstr *Inst) const {
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+void SIInstrInfo::addUsersToMoveToVALUWorklist(
+ unsigned DstReg,
+ MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
+ E = MRI.use_end(); I != E; ++I) {
+ MachineInstr &UseMI = *I->getParent();
+ if (!canReadVGPR(UseMI, I.getOperandNo())) {
+ Worklist.push_back(&UseMI);
}
}
+}
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
- }
+const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
+ const MachineInstr &Inst) const {
+ const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
+
+ switch (Inst.getOpcode()) {
+ // For target instructions, getOpRegClass just returns the virtual register
+ // class associated with the operand, so we need to find an equivalent VGPR
+ // register class in order to move the instruction to the VALU.
+ case AMDGPU::COPY:
+ case AMDGPU::PHI:
+ case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
+ if (RI.hasVGPRs(NewDstRC))
+ return nullptr;
+
+ NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+ if (!NewDstRC)
+ return nullptr;
+ return NewDstRC;
+ default:
+ return NewDstRC;
}
}
+// Find the one SGPR operand we are allowed to use.
unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
int OpIndices[3]) const {
- const MCInstrDesc &Desc = get(MI->getOpcode());
+ const MCInstrDesc &Desc = MI->getDesc();
// Find the one SGPR operand we are allowed to use.
- unsigned SGPRReg = AMDGPU::NoRegister;
-
+ //
// First we need to consider the instruction's operand requirements before
// legalizing. Some operands are required to be SGPRs, such as implicit uses
// of VCC, but we are still bound by the constant bus requirement to only use
@@ -2639,17 +2921,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
//
// If the operand's class is an SGPR, we can never move it.
- for (const MachineOperand &MO : MI->implicit_operands()) {
- // We only care about reads.
- if (MO.isDef())
- continue;
-
- if (MO.getReg() == AMDGPU::VCC)
- return AMDGPU::VCC;
-
- if (MO.getReg() == AMDGPU::FLAT_SCR)
- return AMDGPU::FLAT_SCR;
- }
+ unsigned SGPRReg = findImplicitSGPRRead(*MI);
+ if (SGPRReg != AMDGPU::NoRegister)
+ return SGPRReg;
unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -2660,15 +2934,22 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
break;
const MachineOperand &MO = MI->getOperand(Idx);
- if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
- SGPRReg = MO.getReg();
+ if (!MO.isReg())
+ continue;
- if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
- UsedSGPRs[i] = MO.getReg();
- }
+ // Is this operand statically required to be an SGPR based on the operand
+ // constraints?
+ const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
+ bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
+ if (IsRequiredSGPR)
+ return MO.getReg();
- if (SGPRReg != AMDGPU::NoRegister)
- return SGPRReg;
+ // If this could be a VGPR or an SGPR, Check the dynamic register class.
+ unsigned Reg = MO.getReg();
+ const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
+ if (RI.isSGPRClass(RegRC))
+ UsedSGPRs[i] = Reg;
+ }
// We don't have a required SGPR operand, so we have a bit more freedom in
// selecting operands to move.
@@ -2680,6 +2961,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
// V_FMA_F32 v0, s0, s0, s0 -> No moves
// V_FMA_F32 v0, s0, s1, s0 -> Move s1
+ // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
+ // prefer those.
+
if (UsedSGPRs[0] != AMDGPU::NoRegister) {
if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
SGPRReg = UsedSGPRs[0];
@@ -2720,7 +3004,7 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
getIndirectIndexBegin(*MBB->getParent()));
- return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+ return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC_V1))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addReg(IndirectBaseReg)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 5053786..307ef67 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -39,14 +39,11 @@ private:
unsigned SubIdx,
const TargetRegisterClass *SubRC) const;
- unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineBasicBlock::iterator MI,
- MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineOperand &Op) const;
-
void swapOperands(MachineBasicBlock::iterator Inst) const;
+ void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
+
void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst, unsigned Opcode) const;
@@ -58,13 +55,24 @@ private:
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr *Inst) const;
- void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+ void addUsersToMoveToVALUWorklist(
+ unsigned Reg, MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const;
+
+ const TargetRegisterClass *
+ getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
MachineInstr *MIb) const;
unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
+protected:
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx0,
+ unsigned OpIdx1) const override;
+
public:
explicit SIInstrInfo(const AMDGPUSubtarget &st);
@@ -117,17 +125,14 @@ public:
// register. If there is no hardware instruction that can store to \p
// DstRC, then AMDGPU::COPY is returned.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
+
+ LLVM_READONLY
int commuteOpcode(const MachineInstr &MI) const;
- MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI = false) const override;
bool findCommutedOpIndices(MachineInstr *MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
- bool isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA = nullptr) const;
-
bool areMemAccessesTriviallyDisjoint(
MachineInstr *MIa, MachineInstr *MIb,
AliasAnalysis *AA = nullptr) const override;
@@ -137,8 +142,6 @@ public:
unsigned DstReg, unsigned SrcReg) const override;
bool isMov(unsigned Opcode) const override;
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
-
bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned Reg, MachineRegisterInfo *MRI) const final;
@@ -148,78 +151,154 @@ public:
MachineBasicBlock::iterator &MI,
LiveVariables *LV) const override;
+ static bool isSALU(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SALU;
+ }
+
bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
+ static bool isVALU(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VALU;
+ }
+
bool isVALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VALU;
}
+ static bool isSOP1(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
+ }
+
bool isSOP1(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOP1;
}
+ static bool isSOP2(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
+ }
+
bool isSOP2(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOP2;
}
+ static bool isSOPC(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
+ }
+
bool isSOPC(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPC;
}
+ static bool isSOPK(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
+ }
+
bool isSOPK(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPK;
}
+ static bool isSOPP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
+ }
+
bool isSOPP(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SOPP;
}
+ static bool isVOP1(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
+ }
+
bool isVOP1(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP1;
}
+ static bool isVOP2(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
+ }
+
bool isVOP2(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP2;
}
+ static bool isVOP3(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
+ }
+
bool isVOP3(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
+ static bool isVOPC(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
+ }
+
bool isVOPC(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VOPC;
}
+ static bool isMUBUF(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
+ }
+
bool isMUBUF(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
}
+ static bool isMTBUF(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
+ }
+
bool isMTBUF(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
}
+ static bool isSMRD(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
+ }
+
bool isSMRD(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SMRD;
}
+ static bool isDS(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::DS;
+ }
+
bool isDS(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
+ static bool isMIMG(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
+ }
+
bool isMIMG(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::MIMG;
}
+ static bool isFLAT(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
+ }
+
bool isFLAT(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
+ static bool isWQM(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::WQM;
+ }
+
bool isWQM(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
+ static bool isVGPRSpill(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
+ }
+
bool isVGPRSpill(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
}
@@ -302,6 +381,26 @@ public:
bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
const MachineOperand *MO = nullptr) const;
+ /// \brief Check if \p MO would be a valid operand for the given operand
+ /// definition \p OpInfo. Note this does not attempt to validate constant bus
+ /// restrictions (e.g. literal constant usage).
+ bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const;
+
+ /// \brief Check if \p MO (a register operand) is a legal register for the
+ /// given operand description.
+ bool isLegalRegOperand(const MachineRegisterInfo &MRI,
+ const MCOperandInfo &OpInfo,
+ const MachineOperand &MO) const;
+
+ /// \brief Legalize operands in \p MI by either commuting it or inserting a
+ /// copy of src1.
+ void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
+ /// \brief Fix operands in \p MI to satisfy constant bus requirements.
+ void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
/// \brief Legalize all operands in this instruction. This function may
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
@@ -312,7 +411,8 @@ public:
unsigned HalfImmOp, unsigned HalfSGPROp,
MachineInstr *&Lo, MachineInstr *&Hi) const;
- void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
+ void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI,
+ SmallVectorImpl<MachineInstr *> &Worklist) const;
/// \brief Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
@@ -341,29 +441,49 @@ public:
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
unsigned SavReg, unsigned IndexReg) const;
- void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+ void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const;
/// \brief Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
+ LLVM_READONLY
MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
+ LLVM_READONLY
const MachineOperand *getNamedOperand(const MachineInstr &MI,
unsigned OpName) const {
return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
}
+ /// Get required immediate operand
+ int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
+ return MI.getOperand(Idx).getImm();
+ }
+
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;
};
namespace AMDGPU {
-
+ LLVM_READONLY
int getVOPe64(uint16_t Opcode);
+
+ LLVM_READONLY
int getVOPe32(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteOrig(uint16_t Opcode);
+
+ LLVM_READONLY
int getAddr64Inst(uint16_t Opcode);
+
+ LLVM_READONLY
int getAtomicRetOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8d8110b..10f2adde 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
def isCI : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
- AssemblerPredicate<"FeatureGCN3Encoding">;
+def isCIOnly : Predicate<"Subtarget->getGeneration() =="
+ "AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate <"FeatureSeaIslands">;
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
@@ -69,6 +69,15 @@ class sopk <bits<5> si, bits<5> vi = si> {
field bits<5> VI = vi;
}
+// Specify an SMRD opcode for SI and SMEM opcode for VI
+
+// FIXME: This should really be bits<5> si, Tablegen crashes if
+// parameter default value is other parameter with different bit size
+class smrd<bits<8> si, bits<8> vi = si> {
+ field bits<5> SI = si{4-0};
+ field bits<8> VI = vi;
+}
+
// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
// in AMDGPUInstrInfo.cpp
def SISubtarget {
@@ -121,9 +130,20 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
def SIconstdata_ptr : SDNode<
- "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+ "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>,
+ SDTCisVT<0, i64>]>
>;
+def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(cast<LoadSDNode>(N)) ||
+ isConstantLoad(cast<LoadSDNode>(N), -1);
+}]>;
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(cast<LoadSDNode>(N), -1) &&
+ static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
+}]>;
+
//===----------------------------------------------------------------------===//
// SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
// to be glued to the memory instructions.
@@ -328,9 +348,9 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
U != E; ++U) {
- if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+ const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+ if (RC && SIRI->isSGPRClass(RC))
return true;
- }
}
return false;
}]>;
@@ -354,6 +374,8 @@ def sopp_brtarget : Operand<OtherVT> {
let ParserMatchClass = SoppBrTarget;
}
+def const_ga : Operand<iPTR>;
+
include "SIInstrFormats.td"
include "VIInstrFormats.td"
@@ -393,7 +415,7 @@ def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
class GLCBaseMatchClass <string parser> : AsmOperandClass {
let Name = "GLC"#parser;
let PredicateMethod = "isImm";
- let ParserMethod = parser;
+ let ParserMethod = parser;
let RenderMethod = "addImmOperands";
}
@@ -436,6 +458,17 @@ def ClampMatchClass : AsmOperandClass {
let RenderMethod = "addImmOperands";
}
+class SMRDOffsetBaseMatchClass <string predicate> : AsmOperandClass {
+ let Name = "SMRDOffset"#predicate;
+ let PredicateMethod = predicate;
+ let RenderMethod = "addImmOperands";
+}
+
+def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">;
+def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass <
+ "isSMRDLiteralOffset"
+>;
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : Operand<i1> {
@@ -510,6 +543,16 @@ def ClampMod : Operand <i1> {
let ParserMatchClass = ClampMatchClass;
}
+def smrd_offset : Operand <i32> {
+ let PrintMethod = "printU32ImmOperand";
+ let ParserMatchClass = SMRDOffsetMatchClass;
+}
+
+def smrd_literal_offset : Operand <i32> {
+ let PrintMethod = "printU32ImmOperand";
+ let ParserMatchClass = SMRDLiteralOffsetMatchClass;
+}
+
} // End OperandType = "OPERAND_IMMEDIATE"
def VOPDstS64 : VOPDstOperand <SReg_64>;
@@ -528,6 +571,13 @@ def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def SMRDImm : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgpr : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
+
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
@@ -717,19 +767,6 @@ class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
- def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
-
- def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-
- def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
- (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
- opName#" $dst, $src0, $src1 [$scc]">;
-}
-
multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
list<dag> pattern> {
@@ -758,8 +795,10 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
string opName, PatLeaf cond> : SOPC <
- op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
- opName#" $src0, $src1", []>;
+ op, (outs), (ins rc:$src0, rc:$src1),
+ opName#" $src0, $src1", []> {
+ let Defs = [SCC];
+}
class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
: SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -812,15 +851,20 @@ multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
}
multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
- def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), pattern>;
+ def "" : SOPK_Pseudo <opName, (outs),
+ (ins SReg_32:$src0, u16imm:$src1), pattern> {
+ let Defs = [SCC];
+ }
+
- let DisableEncoding = "$dst" in {
- def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ def _si : SOPK_Real_si <op, opName, (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+ let Defs = [SCC];
+ }
- def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ def _vi : SOPK_Real_vi <op, opName, (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+ let Defs = [SCC];
}
}
@@ -868,35 +912,68 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
}
class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
- string asm> :
- SMRD <outs, ins, asm, []>,
+ string asm, list<dag> pattern = []> :
+ SMRD <outs, ins, asm, pattern>,
SMEMe_vi <op, imm>,
SIMCInstr<opName, SISubtarget.VI> {
let AssemblerPredicates = [isVI];
}
-multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+multiclass SMRD_m <smrd op, string opName, bit imm, dag outs, dag ins,
string asm, list<dag> pattern> {
def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
- def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+ def _si : SMRD_Real_si <op.SI, opName, imm, outs, ins, asm>;
// glc is only applicable to scalar stores, which are not yet
// implemented.
let glc = 0 in {
- def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+ def _vi : SMRD_Real_vi <op.VI, opName, imm, outs, ins, asm>;
}
}
-multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
+multiclass SMRD_Inval <smrd op, string opName,
+ SDPatternOperator node> {
+ let hasSideEffects = 1, mayStore = 1 in {
+ def "" : SMRD_Pseudo <opName, (outs), (ins), [(node)]>;
+
+ let sbase = 0, offset = 0 in {
+ let sdst = 0 in {
+ def _si : SMRD_Real_si <op.SI, opName, 0, (outs), (ins), opName>;
+ }
+
+ let glc = 0, sdata = 0 in {
+ def _vi : SMRD_Real_vi <op.VI, opName, 0, (outs), (ins), opName>;
+ }
+ }
+ }
+}
+
+class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
+ SMRD_Real_vi<op, opName, 0, (outs), (ins), opName, [(node)]> {
+ let hasSideEffects = 1;
+ let mayStore = 1;
+ let sbase = 0;
+ let sdata = 0;
+ let glc = 0;
+ let offset = 0;
+}
+
+multiclass SMRD_Helper <smrd op, string opName, RegisterClass baseClass,
RegisterClass dstClass> {
defm _IMM : SMRD_m <
op, opName#"_IMM", 1, (outs dstClass:$dst),
- (ins baseClass:$sbase, u32imm:$offset),
+ (ins baseClass:$sbase, smrd_offset:$offset),
opName#" $dst, $sbase, $offset", []
>;
+ def _IMM_ci : SMRD <
+ (outs dstClass:$dst), (ins baseClass:$sbase, smrd_literal_offset:$offset),
+ opName#" $dst, $sbase, $offset", []>, SMRD_IMMe_ci <op.SI> {
+ let AssemblerPredicates = [isCIOnly];
+ }
+
defm _SGPR : SMRD_m <
op, opName#"_SGPR", 0, (outs dstClass:$dst),
(ins baseClass:$sbase, SReg_32:$soff),
@@ -922,11 +999,12 @@ def InputModsNoDefault : Operand <i32> {
let ParserMatchClass = InputModsMatchClass;
}
-class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
int ret =
- !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
+ !if (!eq(Src0.Value, untyped.Value), 0,
+ !if (!eq(Src1.Value, untyped.Value), 1, // VOP1
!if (!eq(Src2.Value, untyped.Value), 2, // VOP2
- 3)); // VOP3
+ 3))); // VOP3
}
// Returns the register class to use for the destination of VOP[123C]
@@ -934,28 +1012,37 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
class getVALUDstForVT<ValueType VT> {
RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
!if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
- VOPDstOperand<SReg_64>)); // else VT == i1
+ !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
+ VOPDstOperand<SReg_64>))); // else VT == i1
}
// Returns the register class to use for source 0 of VOP[12C]
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+ RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
}
// Returns the register class to use for source 1 of VOP[12C] for the
// given VT.
class getVOPSrc1ForVT<ValueType VT> {
- RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
+ RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
}
// Returns the register class to use for sources of VOP3 instructions for the
// given VT.
class getVOP3SrcForVT<ValueType VT> {
- RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+ RegisterOperand ret =
+ !if(!eq(VT.Size, 64),
+ VCSrc_64,
+ !if(!eq(VT.Value, i1.Value),
+ SCSrc_64,
+ VCSrc_32
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
+// XXX - do f16 instructions?
class hasModifiers<ValueType SrcVT> {
bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1, 0));
@@ -1009,17 +1096,20 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
// Returns the assembly string for the inputs and outputs of a VOP[12C]
// instruction. This does not add the _e32 suffix, so it can be reused
// by getAsm64.
-class getAsm32 <int NumSrcArgs> {
+class getAsm32 <bit HasDst, int NumSrcArgs> {
+ string dst = "$dst";
+ string src0 = ", $src0";
string src1 = ", $src1";
string src2 = ", $src2";
- string ret = "$dst, $src0"#
- !if(!eq(NumSrcArgs, 1), "", src1)#
- !if(!eq(NumSrcArgs, 3), src2, "");
+ string ret = !if(HasDst, dst, "") #
+ !if(!eq(NumSrcArgs, 1), src0, "") #
+ !if(!eq(NumSrcArgs, 2), src0#src1, "") #
+ !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
}
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers> {
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
!if(!eq(NumSrcArgs, 2), " $src1_modifiers",
@@ -1027,11 +1117,10 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
string ret =
!if(!eq(HasModifiers, 0),
- getAsm32<NumSrcArgs>.ret,
+ getAsm32<HasDst, NumSrcArgs>.ret,
"$dst, "#src0#src1#src2#"$clamp"#"$omod");
}
-
class VOPProfile <list<ValueType> _ArgVT> {
field list<ValueType> ArgVT = _ArgVT;
@@ -1047,29 +1136,38 @@ class VOPProfile <list<ValueType> _ArgVT> {
field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
- field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+ field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
+ field bit HasDst32 = HasDst;
+ field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
field bit HasModifiers = hasModifiers<Src0VT>.ret;
- field dag Outs = (outs DstRC:$dst);
+ field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs));
+
+ // VOP3b instructions are a special case with a second explicit
+ // output. This is manually overridden for them.
+ field dag Outs32 = Outs;
+ field dag Outs64 = Outs;
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
HasModifiers>.ret;
- field string Asm32 = getAsm32<NumSrcArgs>.ret;
- field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+ field string Asm32 = getAsm32<HasDst, NumSrcArgs>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers>.ret;
}
// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
// for the instruction patterns to work.
-def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
-def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
-def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f16, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f16, untyped, untyped]>;
-def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
-def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
+
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -1087,25 +1185,76 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Asm32 = "$dst, vcc, $src0, $src1";
+ let Asm64 = "$dst, $sdst, $src0, $src1";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+}
+
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+ // We use VCSrc_32 to exclude literal constants, even though the
+ // encoding normally allows them since the implicit VCC use means
+ // using one would always violate the constant bus
+ // restriction. SGPRs are still allowed because it should
+ // technically be possible to use VCC again as src0.
let Src0RC32 = VCSrc_32;
+ let Asm32 = "$dst, vcc, $src0, $src1, vcc";
+ let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
+ let Outs32 = (outs DstRC:$dst);
+ let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+
+ // Suppress src2 implied by type since the 32-bit encoding uses an
+ // implicit VCC use.
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
}
-def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
- let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- let Asm64 = "$dst, $src0_modifiers, $src1";
+class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
+}
+
+def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VGPR_32>;
}
-def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
+def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VReg_64>;
+}
+
+// VOPC instructions are a special case because for the 32-bit
+// encoding, we want to display the implicit vcc write as if it were
+// an explicit $dst.
+class VOPC_Profile<ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, vt1, untyped]> {
+ let Asm32 = "vcc, $src0, $src1";
+ // The destination for 32-bit encoding is implicit.
+ let HasDst32 = 0;
+}
+
+class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> {
let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
let Asm64 = "$dst, $src0_modifiers, $src1";
}
+def VOPC_I1_F32_F32 : VOPC_Profile<f32>;
+def VOPC_I1_F64_F64 : VOPC_Profile<f64>;
+def VOPC_I1_I32_I32 : VOPC_Profile<i32>;
+def VOPC_I1_I64_I64 : VOPC_Profile<i64>;
+
+def VOPC_I1_F32_I32 : VOPC_Class_Profile<f32>;
+def VOPC_I1_F64_I32 : VOPC_Class_Profile<f64>;
+
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
- let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+ let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
let Asm64 = "$dst, $src0, $src1, $src2";
}
@@ -1119,13 +1268,60 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
HasModifiers>.ret;
- let Asm32 = getAsm32<2>.ret;
- let Asm64 = getAsm64<2, HasModifiers>.ret;
+ let Asm32 = getAsm32<1, 2>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers>.ret;
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+class SIInstAlias <string asm, Instruction inst, VOPProfile p> :
+ InstAlias <asm, (inst)>, PredicateControl {
+
+ field bit isCompare;
+ field bit isCommutable;
+
+ let ResultInst =
+ !if (p.HasDst32,
+ !if (!eq(p.NumSrcArgs, 0),
+ // 1 dst, 0 src
+ (inst p.DstRC:$dst),
+ !if (!eq(p.NumSrcArgs, 1),
+ // 1 dst, 1 src
+ (inst p.DstRC:$dst, p.Src0RC32:$src0),
+ !if (!eq(p.NumSrcArgs, 2),
+ // 1 dst, 2 src
+ (inst p.DstRC:$dst, p.Src0RC32:$src0, p.Src1RC32:$src1),
+ // else - unreachable
+ (inst)))),
+ // else
+ !if (!eq(p.NumSrcArgs, 2),
+ // 0 dst, 2 src
+ (inst p.Src0RC32:$src0, p.Src1RC32:$src1),
+ !if (!eq(p.NumSrcArgs, 1),
+ // 0 dst, 1 src
+ (inst p.Src0RC32:$src1),
+ // else
+ // 0 dst, 0 src
+ (inst))));
+}
+
+class SIInstAliasSI <string asm, string op_name, VOPProfile p> :
+ SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_si"), p> {
+ let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class SIInstAliasVI <string asm, string op_name, VOPProfile p> :
+ SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_vi"), p> {
+ let AssemblerPredicates = [isVI];
+}
+
+multiclass SIInstAliasBuilder <string asm, VOPProfile p> {
+
+ def : SIInstAliasSI <asm, NAME, p>;
+
+ def : SIInstAliasVI <asm, NAME, p>;
+}
class VOP <string opName> {
string OpName = opName;
@@ -1165,20 +1361,22 @@ class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+ string asm = opName#p.Asm32> {
+ def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
+
+ def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
- def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
}
-multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName> {
- def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+ string asm = opName#p.Asm32> {
+
+ def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
- def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
}
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -1202,22 +1400,24 @@ class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
let AssemblerPredicates = [isVI];
}
-multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
+ string revOp> {
+
+ def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
-multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, string revOp> {
- def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
+ string revOp> {
+
+ def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
- def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+ def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
- def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+ def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
@@ -1250,6 +1450,9 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
MnemonicAlias<opName#"_e64", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
+
+ field bit vdst;
+ field bit src0;
}
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
@@ -1295,22 +1498,6 @@ multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
HasMods>;
}
-// VOP3_m without source modifiers
-multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, int NumSrcArgs, bit HasMods = 1> {
-
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
- let src0_modifiers = 0,
- src1_modifiers = 0,
- src2_modifiers = 0,
- clamp = 0,
- omod = 0 in {
- def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
- def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
- }
-}
-
multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, bit HasMods = 1> {
@@ -1335,7 +1522,7 @@ multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+ bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1349,7 +1536,7 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+ bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1360,54 +1547,41 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
// No VI instruction. This class is for SI only.
}
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
- def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
- // The VOP2 variant puts the carry out into VCC, the VOP3 variant
- // can write it into any SGPR. We currently don't use the carry out,
- // so for now hardcode it to VCC as well.
- let sdst = SIOperand.VCC, Defs = [VCC] in {
- def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
-
- def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 0, HasMods>;
- } // End sdst = SIOperand.VCC, Defs = [VCC]
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
- list<dag> pattern, string opName, string revOp,
- bit HasMods = 1, bit UseFullOp = 0> {
+// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
+// instead of an implicit VCC as in the VOP2b format.
+multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
+ list<dag> pattern, string opName, string revOp,
+ bit HasMods = 1, bit useSrc2Input = 0> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSrc2Input, HasMods>;
def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
- VOP3DisableFields<1, 1, HasMods>;
+ VOP3DisableFields<1, useSrc2Input, HasMods>;
}
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec, string revOp> {
+ bit HasMods, bit defExec,
+ string revOp, list<SchedReadWrite> sched> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
+ }
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
}
@@ -1432,32 +1606,28 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
}
}
-multiclass VOP1_Helper <vop1 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- bit HasMods> {
+multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
+ list<dag> pat64> {
- defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
+ defm _e32 : VOP1_m <op, opName, p, pat32>;
- defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
+ defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ p.HasModifiers>;
}
multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP1_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
- P.HasModifiers
+ [(set P.DstVT:$dst, (node P.Src0VT:$src0))])
>;
multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> {
- defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
+ defm _e32 : VOP1SI_m <op, opName, P, []>;
defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
@@ -1467,36 +1637,33 @@ multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
opName, P.HasModifiers>;
}
-multiclass VOP2_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
+ list<dag> pat64, string revOp> {
- defm _e64 : VOP3_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
+ defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+
+ defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ revOp, p.HasModifiers>;
}
multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp
>;
multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> {
- defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+ defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
!if(P.HasModifiers,
@@ -1508,58 +1675,55 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
opName, revOp, P.HasModifiers>;
}
-multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
+multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
+ list<dag> pat32, list<dag> pat64,
+ string revOp, bit useSGPRInput> {
- defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+ let SchedRW = [Write32Bit, WriteSALU] in {
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+ defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+ }
- defm _e64 : VOP3b_2_m <op,
- outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
- >;
+ defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
+ opName, revOp, p.HasModifiers, useSGPRInput>;
+ }
}
multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName> : VOP2b_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp, !eq(P.NumSrcArgs, 3)
>;
// A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
- dag ins32, string asm32, list<dag> pat32,
- dag ins64, string asm64, list<dag> pat64,
- string revOp, bit HasMods> {
- defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
+ list<dag> pat32, list<dag> pat64, string revOp> {
- defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
- revOp, HasMods>;
+ defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
+
+ defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+ revOp, p.HasModifiers>;
}
multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag,
string revOp = opName>
: VOP2_VI3_Helper <
- op, opName, P.Outs,
- P.Ins32, P.Asm32, [],
- P.Ins64, P.Asm64,
+ op, opName, P, [],
!if(P.HasModifiers,
[(set P.DstVT:$dst,
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
[(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
- revOp, P.HasModifiers
+ revOp
>;
multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
@@ -1583,64 +1747,75 @@ let isCodeGenOnly = 0 in {
} // End isCodeGenOnly = 0
}
-class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
-multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
- string opName, bit DefExec, string revOpName = ""> {
- def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
-
- def _si : VOPC<op.SI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- let AssemblerPredicates = [isSICI];
+multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
+ string opName, bit DefExec, VOPProfile p,
+ list<SchedReadWrite> sched,
+ string revOpName = "", string asm = opName#"_e32 "#op_asm,
+ string alias_asm = opName#" "#op_asm> {
+ def "" : VOPC_Pseudo <ins, pattern, opName> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let SchedRW = sched;
}
- def _vi : VOPC<op.VI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let Defs = !if(DefExec, [EXEC], []);
- let hasSideEffects = DefExec;
- let AssemblerPredicates = [isVI];
- }
+ let AssemblerPredicates = [isSICI] in {
+ def _si : VOPC<op.SI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
+
+ } // End AssemblerPredicates = [isSICI]
+
+ let AssemblerPredicates = [isVI] in {
+ def _vi : VOPC<op.VI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
+
+ } // End AssemblerPredicates = [isVI]
+
+ defm : SIInstAliasBuilder<alias_asm, p>;
}
-multiclass VOPC_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+multiclass VOPC_Helper <vopc op, string opName, list<dag> pat32,
+ list<dag> pat64, bit DefExec, string revOp,
+ VOPProfile p, list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>;
+ defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+ opName, p.HasModifiers, DefExec, revOp, sched>;
}
// Special case for class instructions which only have modifiers on
// the 1st source operand.
-multiclass VOPC_Class_Helper <vopc op, string opName,
- dag ins32, string asm32, list<dag> pat32,
- dag out64, dag ins64, string asm64, list<dag> pat64,
- bit HasMods, bit DefExec, string revOp> {
- defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
-
- defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>,
+multiclass VOPC_Class_Helper <vopc op, string opName, list<dag> pat32,
+ list<dag> pat64, bit DefExec, string revOp,
+ VOPProfile p, list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
+
+ defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+ opName, p.HasModifiers, DefExec, revOp, sched>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
string revOp = opName,
- bit DefExec = 0> : VOPC_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+ bit DefExec = 0,
+ list<SchedReadWrite> sched = [Write32Bit]> :
+ VOPC_Helper <
+ op, opName, [],
!if(P.HasModifiers,
[(set i1:$dst,
(setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1648,51 +1823,51 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec, revOp
+ DefExec, revOp, P, sched
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
- bit DefExec = 0> : VOPC_Class_Helper <
- op, opName,
- P.Ins32, P.Asm32, [],
- (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+ bit DefExec = 0,
+ list<SchedReadWrite> sched> : VOPC_Class_Helper <
+ op, opName, [],
!if(P.HasModifiers,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec, opName
+ DefExec, opName, P, sched
>;
multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_F32_F32, cond, revOp>;
multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp, 0, [WriteDoubleAdd]>;
multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_I32_I32, cond, revOp>;
multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp, 0, [Write64Bit]>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
PatLeaf cond = COND_NULL,
+ list<SchedReadWrite> sched,
string revOp = "">
- : VOPCInst <op, opName, P, cond, revOp, 1>;
+ : VOPCInst <op, opName, P, cond, revOp, 1, sched>;
multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, [WriteDoubleAdd], revOp>;
multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, [Write64Bit], revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
@@ -1700,16 +1875,16 @@ multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0, [Write32Bit]>;
multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1, [Write32Bit]>;
multiclass VOPC_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0, [WriteDoubleAdd]>;
multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1, [WriteDoubleAdd]>;
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
@@ -1761,25 +1936,13 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
3, 1
>;
-multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
- string opName, list<dag> pattern> :
- VOP3b_3_m <
- op, (outs vrc:$vdst, SReg_64:$sdst),
- (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
- InputModsNoDefault:$src1_modifiers, arc:$src1,
- InputModsNoDefault:$src2_modifiers, arc:$src2,
- ClampMod:$clamp, omod:$omod),
- opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
- opName, opName, 1, 1
+multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = []> :
+ VOP3b_2_3_m <
+ op, P.Outs64, P.Ins64,
+ opName#" "#P.Asm64, pattern,
+ opName, "", 1, 1
>;
-multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
- VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
-
-
class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
(node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
@@ -1925,12 +2088,14 @@ multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
+ let hasPostISelHook = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
- let data1 = 0 in {
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ let data1 = 0 in {
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ }
}
}
@@ -1939,11 +2104,13 @@ multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
dag outs = (outs rc:$vdst),
string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
- def "" : DS_Pseudo <opName, outs, ins, []>,
- AtomicNoRet<noRetOp, 1>;
+ let hasPostISelHook = 1 in {
+ def "" : DS_Pseudo <opName, outs, ins, []>,
+ AtomicNoRet<noRetOp, 1>;
- def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
- def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+ def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+ }
}
multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
@@ -2214,7 +2381,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_addr64", (outs),
- (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+ (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
>;
@@ -2233,7 +2400,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
op, name#"_rtn_addr64", (outs rc:$vdata),
- (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+ (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
[(set vt:$vdata,
@@ -2245,7 +2412,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
op, name#"_rtn_offset", (outs rc:$vdata),
(ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
mbuf_offset:$offset, slc:$slc),
- name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+ name#" $vdata, $srsrc, $soffset"#"$offset"#" glc$slc",
[(set vt:$vdata,
(atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
i1:$slc), vt:$vdata_in))], 1
@@ -2256,6 +2423,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
} // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
}
+// FIXME: tfe can't be an operand because it requires a separate
+// opcode because it needs an N+1 register class dest register.
multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> {
@@ -2368,47 +2537,121 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
} // End mayLoad = 0, mayStore = 1
}
-class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
- FLAT <op, (outs regClass:$vdst),
- (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
- asm#" $vdst, $addr"#"$glc"#"$slc"#"$tfe", []> {
- let data = 0;
- let mayLoad = 1;
+// For cache invalidation instructions.
+multiclass MUBUF_Invalidate <mubuf op, string opName, SDPatternOperator node> {
+ let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in {
+ def "" : MUBUF_Pseudo <opName, (outs), (ins), [(node)]>;
+
+ // Set everything to 0.
+ let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0,
+ vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in {
+ let addr64 = 0 in {
+ def _si : MUBUF_Real_si <op, opName, (outs), (ins), opName>;
+ }
+
+ def _vi : MUBUF_Real_vi <op, opName, (outs), (ins), opName>;
+ }
+ } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = ""
}
-class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
- FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr,
- glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
- name#" $data, $addr"#"$glc"#"$slc"#"$tfe",
- []> {
+//===----------------------------------------------------------------------===//
+// FLAT classes
+//===----------------------------------------------------------------------===//
+
+class flat <bits<7> ci, bits<7> vi = ci> {
+ field bits<7> CI = ci;
+ field bits<7> VI = vi;
+}
- let mayLoad = 0;
- let mayStore = 1;
+class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+ FLAT <0, outs, ins, "", pattern>,
+ SIMCInstr<opName, SISubtarget.NONE> {
+ let isPseudo = 1;
+ let isCodeGenOnly = 1;
+}
- // Encoding
- let vdst = 0;
+class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
+ FLAT <op, outs, ins, asm, []>,
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicate = isCIOnly;
}
-multiclass FLAT_ATOMIC <bits<7> op, string name, RegisterClass vdst_rc,
- RegisterClass data_rc = vdst_rc> {
+class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
+ FLAT <op, outs, ins, asm, []>,
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicate = VIAssemblerPredicate;
+}
- let mayLoad = 1, mayStore = 1 in {
- def "" : FLAT <op, (outs),
- (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
- tfe_flat_atomic:$tfe),
- name#" $addr, $data"#"$slc"#"$tfe", []>,
- AtomicNoRet <NAME, 0> {
- let glc = 0;
- let vdst = 0;
- }
+multiclass FLAT_AtomicRet_m <flat op, dag outs, dag ins, string asm,
+ list<dag> pattern> {
+ def "" : FLAT_Pseudo <NAME#"_RTN", outs, ins, pattern>,
+ AtomicNoRet <NAME, 1>;
- def _RTN : FLAT <op, (outs vdst_rc:$vdst),
- (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
- tfe_flat_atomic:$tfe),
- name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>,
- AtomicNoRet <NAME, 1> {
- let glc = 1;
- }
+ def _ci : FLAT_Real_ci <op.CI, NAME#"_RTN", outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME#"_RTN", outs, ins, asm>;
+}
+
+multiclass FLAT_Load_Helper <flat op, string asm_name,
+ RegisterClass regClass,
+ dag outs = (outs regClass:$vdst),
+ dag ins = (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
+ string asm = asm_name#" $vdst, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+ let data = 0, mayLoad = 1 in {
+
+ def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+ }
+}
+
+multiclass FLAT_Store_Helper <flat op, string asm_name,
+ RegisterClass vdataClass,
+ dag outs = (outs),
+ dag ins = (ins vdataClass:$data, VReg_64:$addr, glc_flat:$glc,
+ slc_flat:$slc, tfe_flat:$tfe),
+ string asm = asm_name#" $data, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+ let mayLoad = 0, mayStore = 1, vdst = 0 in {
+
+ def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+ }
+}
+
+multiclass FLAT_ATOMIC <flat op, string asm_name, RegisterClass vdst_rc,
+ RegisterClass data_rc = vdst_rc,
+ dag outs_noret = (outs),
+ string asm_noret = asm_name#" $addr, $data"#"$slc"#"$tfe"> {
+
+ let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in {
+ def "" : FLAT_Pseudo <NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe), []>,
+ AtomicNoRet <NAME, 0>;
+
+ def _ci : FLAT_Real_ci <op.CI, NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+ asm_noret>;
+
+ def _vi : FLAT_Real_vi <op.VI, NAME, outs_noret,
+ (ins VReg_64:$addr, data_rc:$data,
+ slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+ asm_noret>;
+ }
+
+ let glc = 1, hasPostISelHook = 1 in {
+ defm _RTN : FLAT_AtomicRet_m <op, (outs vdst_rc:$vdst),
+ (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
+ tfe_flat_atomic:$tfe),
+ asm_name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>;
}
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index e0eeea9..6f653c7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -30,7 +30,9 @@ def isGCN : Predicate<"Subtarget->getGeneration() "
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
AssemblerPredicate<"FeatureGCN">;
def isSI : Predicate<"Subtarget->getGeneration() "
- "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureSouthernIslands">;
+
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
@@ -62,36 +64,38 @@ let mayLoad = 1 in {
// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
// SMRD instructions, because the SGPR_32 register class does not include M0
// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "s_buffer_load_dword", SReg_128, SGPR_32
+ smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
>;
defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
+ smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64
>;
defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
+ smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128
>;
defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
+ smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256
>;
defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
+ smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
>;
} // mayLoad = 1
//def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
+
+defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
+ int_amdgcn_s_dcache_inv>;
//===----------------------------------------------------------------------===//
// SOP1 Instructions
@@ -123,7 +127,7 @@ let Defs = [SCC] in {
defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
- [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+ [(set i32:$dst, (bitreverse i32:$src0))]
>;
defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
@@ -183,10 +187,14 @@ defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>
defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+
+let Uses = [M0] in {
defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+} // End Uses = [M0]
+
defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
let Defs = [SCC] in {
@@ -354,7 +362,7 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
// SOPK Instructions
//===----------------------------------------------------------------------===//
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
} // End isReMaterializable = 1
let Uses = [SCC] in {
@@ -438,36 +446,38 @@ def S_BRANCH : SOPP <
let isBarrier = 1;
}
-let DisableEncoding = "$scc" in {
+let Uses = [SCC] in {
def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ 0x00000004, (ins sopp_brtarget:$simm16),
"s_cbranch_scc0 $simm16"
>;
def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+ 0x00000005, (ins sopp_brtarget:$simm16),
"s_cbranch_scc1 $simm16"
>;
-} // End DisableEncoding = "$scc"
+} // End Uses = [SCC]
+let Uses = [VCC] in {
def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000006, (ins sopp_brtarget:$simm16),
"s_cbranch_vccz $simm16"
>;
def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+ 0x00000007, (ins sopp_brtarget:$simm16),
"s_cbranch_vccnz $simm16"
>;
+} // End Uses = [VCC]
-let DisableEncoding = "$exec" in {
+let Uses = [EXEC] in {
def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ 0x00000008, (ins sopp_brtarget:$simm16),
"s_cbranch_execz $simm16"
>;
def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+ 0x00000009, (ins sopp_brtarget:$simm16),
"s_cbranch_execnz $simm16"
>;
-} // End DisableEncoding = "$exec"
+} // End Uses = [EXEC]
} // End isBranch = 1
@@ -477,11 +487,11 @@ let hasSideEffects = 1 in {
def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
[(int_AMDGPU_barrier_local)]
> {
+ let SchedRW = [WriteBarrier];
let simm16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
let mayLoad = 1;
let mayStore = 1;
+ let isConvergent = 1;
}
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
@@ -805,9 +815,6 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-let SubtargetPredicate = isCI in {
-defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
-} // End isCI
defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
let mayStore = 0 in {
defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
@@ -905,11 +912,6 @@ defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
-//let SubtargetPredicate = isCI in {
-// DS_CONDXCHG32_RTN_B64
-// DS_CONDXCHG32_RTN_B128
-//} // End isCI
-
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
@@ -951,13 +953,13 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
- mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
+ mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
- mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
+ mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
- mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+ mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
@@ -1034,9 +1036,12 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
-//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
+
+let SubtargetPredicate = isSI in {
+defm BUFFER_WBINVL1_SC : MUBUF_Invalidate <mubuf<0x70>, "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI
+}
+
+defm BUFFER_WBINVL1 : MUBUF_Invalidate <mubuf<0x71, 0x3e>, "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
@@ -1155,8 +1160,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
// VOP1 Instructions
//===----------------------------------------------------------------------===//
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
}
let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1292,7 +1297,9 @@ defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
VOP_F64_F64, fsqrt
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
+
+let SchedRW = [WriteQuarterRate32] in {
defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
VOP_F32_F32, AMDGPUsin
@@ -1300,6 +1307,9 @@ defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
VOP_F32_F32, AMDGPUcos
>;
+
+} // End SchedRW = [WriteQuarterRate32]
+
defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
@@ -1308,24 +1318,33 @@ defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
VOP_I32_F64
>;
+
+let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
VOP_F64_F64
>;
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
+
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
+ VOP_F64_F64
+>;
+} // End SchedRW = [WriteDoubleAdd]
+
+
defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
VOP_I32_F32
>;
defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
VOP_F32_F32
>;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
- "v_clrexcp"
->;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
}
+
+let Uses = [M0, EXEC] in {
defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+} // End Uses = [M0, EXEC]
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
@@ -1343,7 +1362,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
VOP_F32_F32, AMDGPUrsq_legacy
>;
-} // End let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteDouble] in {
@@ -1360,7 +1379,7 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
// VINTRP Instructions
//===----------------------------------------------------------------------===//
-let Uses = [M0] in {
+let Uses = [M0, EXEC] in {
// FIXME: Specify SchedRW for VINTRP insturctions.
@@ -1405,16 +1424,14 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
[(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
(i32 imm:$attr)))]>;
-} // End Uses = [M0]
+} // End Uses = [M0, EXEC]
//===----------------------------------------------------------------------===//
// VOP2 Instructions
//===----------------------------------------------------------------------===//
multiclass V_CNDMASK <vop2 op, string name> {
- defm _e32 : VOP2_m <
- op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
- name, name>;
+ defm _e32 : VOP2_m <op, name, VOP_CNDMASK, [], name>;
defm _e64 : VOP3_m <
op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
@@ -1500,34 +1517,32 @@ let isCommutable = 1 in {
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
} // End isCommutable = 1
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+let isCommutable = 1 in {
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
// but the VI instructions behave the same as the SI versions.
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
- VOP_I32_I32_I32, add
+ VOP2b_I32_I1_I32_I32
>;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
- VOP_I32_I32_I32, null_frag, "v_sub_i32"
+ VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
>;
-let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
- VOP_I32_I32_I32_VCC
+ VOP2b_I32_I1_I32_I32_I1
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
- VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+ VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
>;
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+} // End isCommutable = 1
defm V_READLANE_B32 : VOP2SI_3VI_m <
vop3 <0x001, 0x289>,
@@ -1575,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
VOP_I32_I32_I32
>;
defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
- VOP_I32_I32_I32
+ VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
>;
defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
- VOP_I32_I32_I32
+ VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
>;
defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
@@ -1704,15 +1719,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
defm V_DIV_FIXUP_F64 : VOP3Inst <
vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
let isCommutable = 1 in {
defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
@@ -1735,7 +1750,7 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
VOP_F64_F64_I32, AMDGPUldexp
>;
-} // let SchedRW = [WriteDouble]
+} // let SchedRW = [WriteDoubleAdd]
let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
@@ -1756,16 +1771,21 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
+ VOP3b_F32_I1_F32_F32_F32
+>;
}
let SchedRW = [WriteDouble, WriteSALU] in {
// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
+ VOP3b_F64_I1_F64_F64_F64
+>;
} // let SchedRW = [WriteDouble]
-let isCommutable = 1, Uses = [VCC] in {
+let isCommutable = 1, Uses = [VCC, EXEC] in {
+let SchedRW = [WriteFloatFMA] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
@@ -1774,6 +1794,7 @@ let isCommutable = 1, Uses = [VCC] in {
defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
+}
let SchedRW = [WriteDouble] in {
// v_div_fmas_f64:
@@ -1786,7 +1807,7 @@ defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
>;
} // End SchedRW = [WriteDouble]
-} // End isCommutable = 1
+} // End isCommutable = 1, Uses = [VCC, EXEC]
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
@@ -1835,13 +1856,13 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
>;
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, SALU = 1 in {
def SGPR_USE : InstSI <(outs),(ins), "", []>;
}
@@ -1921,39 +1942,9 @@ def SI_KILL : InstSI <
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
-//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
-
-let UseNamedOperandTable = 1 in {
-
-def SI_RegisterLoad : InstSI <
+class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
(outs VGPR_32:$dst, SReg_64:$temp),
- (ins FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterLoad = 1;
- let mayLoad = 1;
-}
-
-class SIRegStore<dag outs> : InstSI <
- outs,
- (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
- "", []
-> {
- let isRegisterStore = 1;
- let mayStore = 1;
-}
-
-let usesCustomInserter = 1 in {
-def SI_RegisterStorePseudo : SIRegStore<(outs)>;
-} // End usesCustomInserter = 1
-def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
-
-
-} // End UseNamedOperandTable = 1
-
-def SI_INDIRECT_SRC : InstSI <
- (outs VGPR_32:$dst, SReg_64:$temp),
- (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+ (ins rc:$src, VSrc_32:$idx, i32imm:$off),
"si_indirect_src $dst, $temp, $src, $idx, $off",
[]
>;
@@ -1967,6 +1958,13 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
let Constraints = "$src = $dst";
}
+// TODO: We can support indirect SGPR access.
+def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
+def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
+def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
+def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
+def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
+
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
@@ -1977,19 +1975,24 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
- let UseNamedOperandTable = 1 in {
+ let UseNamedOperandTable = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
+ (ins sgpr_class:$src, i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1
}
@@ -2003,19 +2006,25 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- let UseNamedOperandTable = 1, VGPRSpill = 1 in {
+ let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 1;
+ let mayLoad = 0;
+ }
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
- >;
+ > {
+ let mayStore = 0;
+ let mayLoad = 1;
+ }
} // End UseNamedOperandTable = 1, VGPRSpill = 1
}
@@ -2030,9 +2039,11 @@ let Defs = [SCC] in {
def SI_CONSTDATA_PTR : InstSI <
(outs SReg_64:$dst),
- (ins),
- "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
->;
+ (ins const_ga:$ptr),
+ "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))]
+> {
+ let SALU = 1;
+}
} // End Defs = [SCC]
@@ -2072,84 +2083,63 @@ def : Pat <
// SMRD Patterns
//===----------------------------------------------------------------------===//
-multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
- // 1. SI-CI: Offset as 8bit DWORD immediate
+ // 1. IMM offset
def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
- (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
+ (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
>;
- // 2. Offset loaded in an 32bit SGPR
+ // 2. SGPR offset
def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
>;
- // 3. No offset at all
def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
+ (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+ (vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
+ > {
+ let Predicates = [isCIOnly];
+ }
}
-multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
-
- // 1. VI: Offset as 20bit immediate in bytes
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
- (vt (Instr_IMM $sbase, (as_i32imm $offset)))
- >;
-
- // 2. Offset loaded in an 32bit SGPR
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
- >;
-
- // 3. No offset at all
- def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
-}
-
-let Predicates = [isSICI] in {
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isSICI]
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
-let Predicates = [isVI] in {
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isVI]
+defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-let Predicates = [isSICI] in {
+// 1. Offset as an immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
+>;
-// 1. Offset as 8bit DWORD immediate
+// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
+ (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
>;
-} // End Predicates = [isSICI]
+let Predicates = [isCI] in {
-// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant v4i32:$sbase, imm:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
+ (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
+ (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)
>;
+} // End Predicates = [isCI]
+
+} // End let AddedComplexity = 10000
+
//===----------------------------------------------------------------------===//
// SOP1 Patterns
//===----------------------------------------------------------------------===//
@@ -2161,6 +2151,11 @@ def : Pat <
(S_MOV_B32 0), sub1))
>;
+def : Pat <
+ (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+ (S_ABS_I32 $x)
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
@@ -2488,6 +2483,11 @@ def : Pat <
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
+//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
+//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
+//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
+//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
+
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2568,11 +2568,25 @@ def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <i64, v2i32, VReg_64>;
def : BitConvert <v2f32, i64, VReg_64>;
def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2f32, VReg_64>;
def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+
+def : BitConvert <v2i64, v4i32, SReg_128>;
+def : BitConvert <v4i32, v2i64, SReg_128>;
+
+def : BitConvert <v2f64, v4f32, VReg_128>;
+def : BitConvert <v2f64, v4i32, VReg_128>;
+def : BitConvert <v4f32, v2f64, VReg_128>;
+def : BitConvert <v4i32, v2f64, VReg_128>;
+
+
+
+
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
def : BitConvert <v8i32, v32i8, SReg_256>;
@@ -2601,10 +2615,9 @@ def : Pat <
// Prevent expanding both fneg and fabs.
-// FIXME: Should use S_OR_B32
def : Pat <
(fneg (fabs f32:$src)),
- (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+ (S_OR_B32 $src, 0x80000000) /* Set sign bit */
>;
// FIXME: Should use S_OR_B32
@@ -2836,10 +2849,6 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
// -1. For the non-rtn variants, the manual says it does
// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
// will always do the increment so I'm assuming it's the same.
-//
-// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
-// needs to be a VGPR. The SGPR copy pass will fix this, and it's
-// easier since there is no v_mov_b64.
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
@@ -2855,9 +2864,9 @@ class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
// 32-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_add_local>;
+ V_MOV_B32_e32, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
- S_MOV_B32, si_atomic_load_sub_local>;
+ V_MOV_B32_e32, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
@@ -2874,9 +2883,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
// 64-bit atomics.
def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_add_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_add_local>;
def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
- S_MOV_B64, si_atomic_load_sub_local>;
+ V_MOV_B64_PSEUDO, si_atomic_load_sub_local>;
def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
@@ -3019,90 +3028,46 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-let SubtargetPredicate = isCI in {
-
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
- VOP_I32_I32_I32
->;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
- VOP_I32_I32_I32
->;
-
-let isCommutable = 1 in {
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
- VOP_I64_I32_I32_I64
->;
-
-// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
- VOP_I64_I32_I32_I64
->;
-} // End isCommutable = 1
-
-// Remaining instructions:
-// FLAT_*
-// S_CBRANCH_CDBGUSER
-// S_CBRANCH_CDBGSYS
-// S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-// S_DCACHE_INV_VOL
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
-
-} // End isCI
-
/********** ====================== **********/
/********** Indirect adressing **********/
/********** ====================== **********/
-multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
// 1. Extract with offset
def : Pat<
- (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
- (SI_INDIRECT_SRC $vec, $idx, imm:$off)
+ (eltvt (extractelt vt:$vec, (add i32:$idx, imm:$off))),
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off)
>;
// 2. Extract without offset
def : Pat<
- (eltvt (vector_extract vt:$vec, i32:$idx)),
- (SI_INDIRECT_SRC $vec, $idx, 0)
+ (eltvt (extractelt vt:$vec, i32:$idx)),
+ (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0)
>;
// 3. Insert with offset
def : Pat<
- (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
- (IndDst $vec, $idx, imm:$off, $val)
+ (insertelt vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
- (vector_insert vt:$vec, eltvt:$val, i32:$idx),
- (IndDst $vec, $idx, 0, $val)
+ (insertelt vt:$vec, eltvt:$val, i32:$idx),
+ (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val)
>;
}
-defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
+defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
+defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
+defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
-defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
+defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
+defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
+defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
//===----------------------------------------------------------------------===//
// Conversion Patterns
@@ -3215,12 +3180,12 @@ def : Pat <
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
>;
def : Pat <
(i1 (trunc i64:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+ (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)
>;
@@ -3301,24 +3266,6 @@ def : Pat <
} // End Predicates = [isSI]
-let Predicates = [isCI] in {
-
-// Convert (x - floor(x)) to fract(x)
-def : Pat <
- (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
- (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
- (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-// Convert (x + (-floor(x))) to fract(x)
-def : Pat <
- (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
- (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-} // End Predicates = [isCI]
-
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index c319b32..126f624 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -103,6 +103,10 @@ public:
return "SI Lower control flow instructions";
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
};
} // End anonymous namespace
@@ -140,8 +144,7 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
DebugLoc DL = From.getDebugLoc();
BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
- .addOperand(To)
- .addReg(AMDGPU::EXEC);
+ .addOperand(To);
}
void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
@@ -159,8 +162,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
// If the exec mask is non-zero, skip the next two instructions
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(3)
- .addReg(AMDGPU::EXEC);
+ .addImm(3);
// Exec mask is zero: Export to NULL target...
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
@@ -269,8 +271,7 @@ void SILowerControlFlowPass::Loop(MachineInstr &MI) {
.addReg(Src);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addOperand(MI.getOperand(1))
- .addReg(AMDGPU::EXEC);
+ .addOperand(MI.getOperand(1));
MI.eraseFromParent();
}
@@ -316,7 +317,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
.addImm(0);
}
} else {
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
.addImm(0)
.addOperand(Op);
}
@@ -362,9 +363,9 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
.addReg(AMDGPU::VCC_LO);
// Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
// Update EXEC, save the original EXEC value to VCC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
@@ -385,8 +386,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
+ .addImm(-7);
// Restore EXEC
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
@@ -438,7 +438,6 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(Reg)
- .addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
LoadM0(MI, MovRel, Off);
@@ -460,7 +459,6 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
.addReg(Reg, RegState::Define)
.addReg(Val)
- .addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
LoadM0(MI, MovRel, Off);
@@ -486,11 +484,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
+ if (TII->isWQM(MI) || TII->isDS(MI))
NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
- if (TII->isFLAT(MI.getOpcode()))
+ if (TII->isFLAT(MI))
NeedFlat = true;
switch (MI.getOpcode()) {
@@ -541,7 +539,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Branch(MI);
break;
- case AMDGPU::SI_INDIRECT_SRC:
+ case AMDGPU::SI_INDIRECT_SRC_V1:
+ case AMDGPU::SI_INDIRECT_SRC_V2:
+ case AMDGPU::SI_INDIRECT_SRC_V4:
+ case AMDGPU::SI_INDIRECT_SRC_V8:
+ case AMDGPU::SI_INDIRECT_SRC_V16:
IndirectSrc(MI);
break;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 67421e2..a2fa5fd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -48,6 +48,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 587ea63..935aad4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -29,10 +29,114 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
- HasSpilledVGPRs(false),
+ ScratchRSrcReg(AMDGPU::NoRegister),
+ ScratchWaveOffsetReg(AMDGPU::NoRegister),
+ PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
+ DispatchPtrUserSGPR(AMDGPU::NoRegister),
+ QueuePtrUserSGPR(AMDGPU::NoRegister),
+ KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
+ DispatchIDUserSGPR(AMDGPU::NoRegister),
+ FlatScratchInitUserSGPR(AMDGPU::NoRegister),
+ PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
+ GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
+ WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
+ WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
+ PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+ LDSWaveSpillSize(0),
PSInputAddr(0),
NumUserSGPRs(0),
- LDSWaveSpillSize(0) { }
+ NumSystemSGPRs(0),
+ HasSpilledSGPRs(false),
+ HasSpilledVGPRs(false),
+ PrivateSegmentBuffer(false),
+ DispatchPtr(false),
+ QueuePtr(false),
+ DispatchID(false),
+ KernargSegmentPtr(false),
+ FlatScratchInit(false),
+ GridWorkgroupCountX(false),
+ GridWorkgroupCountY(false),
+ GridWorkgroupCountZ(false),
+ WorkGroupIDX(true),
+ WorkGroupIDY(false),
+ WorkGroupIDZ(false),
+ WorkGroupInfo(false),
+ PrivateSegmentWaveByteOffset(false),
+ WorkItemIDX(true),
+ WorkItemIDY(false),
+ WorkItemIDZ(false) {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ const Function *F = MF.getFunction();
+
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+ if (getShaderType() == ShaderType::COMPUTE)
+ KernargSegmentPtr = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+
+ bool MaySpill = ST.isVGPRSpillingEnabled(this);
+ bool HasStackObjects = FrameInfo->hasStackObjects();
+
+ if (HasStackObjects || MaySpill)
+ PrivateSegmentWaveByteOffset = true;
+
+ if (ST.isAmdHsaOS()) {
+ if (HasStackObjects || MaySpill)
+ PrivateSegmentBuffer = true;
+
+ if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
+ DispatchPtr = true;
+ }
+
+ // X, XY, and XYZ are the only supported combinations, so make sure Y is
+ // enabled if Z is.
+ if (WorkItemIDZ)
+ WorkItemIDY = true;
+}
+
+unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
+ const SIRegisterInfo &TRI) {
+ PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ NumUserSGPRs += 4;
+ return PrivateSegmentBufferUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
+ DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return DispatchPtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
+ QueuePtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return QueuePtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
+ KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return KernargSegmentPtrUserSGPR;
+}
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
MachineFunction *MF,
@@ -53,7 +157,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
- MRI.setPhysRegUsed(LaneVGPR);
// Add this register as live-in to all blocks to avoid machine verifer
// complaining about use of an undefined physical register.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 667da4c..9c528d6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -26,13 +26,83 @@ class MachineRegisterInfo;
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+ // FIXME: This should be removed and getPreloadedValue moved here.
+ friend struct SIRegisterInfo;
void anchor() override;
unsigned TIDReg;
- bool HasSpilledVGPRs;
+
+ // Registers that may be reserved for spilling purposes. These may be the same
+ // as the input registers.
+ unsigned ScratchRSrcReg;
+ unsigned ScratchWaveOffsetReg;
+
+ // Input registers setup for the HSA ABI.
+ // User SGPRs in allocation order.
+ unsigned PrivateSegmentBufferUserSGPR;
+ unsigned DispatchPtrUserSGPR;
+ unsigned QueuePtrUserSGPR;
+ unsigned KernargSegmentPtrUserSGPR;
+ unsigned DispatchIDUserSGPR;
+ unsigned FlatScratchInitUserSGPR;
+ unsigned PrivateSegmentSizeUserSGPR;
+ unsigned GridWorkGroupCountXUserSGPR;
+ unsigned GridWorkGroupCountYUserSGPR;
+ unsigned GridWorkGroupCountZUserSGPR;
+
+ // System SGPRs in allocation order.
+ unsigned WorkGroupIDXSystemSGPR;
+ unsigned WorkGroupIDYSystemSGPR;
+ unsigned WorkGroupIDZSystemSGPR;
+ unsigned WorkGroupInfoSystemSGPR;
+ unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
public:
+ // FIXME: Make private
+ unsigned LDSWaveSpillSize;
+ unsigned PSInputAddr;
+ std::map<unsigned, unsigned> LaneVGPRs;
+ unsigned ScratchOffsetReg;
+ unsigned NumUserSGPRs;
+ unsigned NumSystemSGPRs;
+
+private:
+ bool HasSpilledSGPRs;
+ bool HasSpilledVGPRs;
+
+ // Feature bits required for inputs passed in user SGPRs.
+ bool PrivateSegmentBuffer : 1;
+ bool DispatchPtr : 1;
+ bool QueuePtr : 1;
+ bool DispatchID : 1;
+ bool KernargSegmentPtr : 1;
+ bool FlatScratchInit : 1;
+ bool GridWorkgroupCountX : 1;
+ bool GridWorkgroupCountY : 1;
+ bool GridWorkgroupCountZ : 1;
+
+ // Feature bits required for inputs passed in system SGPRs.
+ bool WorkGroupIDX : 1; // Always initialized.
+ bool WorkGroupIDY : 1;
+ bool WorkGroupIDZ : 1;
+ bool WorkGroupInfo : 1;
+ bool PrivateSegmentWaveByteOffset : 1;
+
+ bool WorkItemIDX : 1; // Always initialized.
+ bool WorkItemIDY : 1;
+ bool WorkItemIDZ : 1;
+
+ MCPhysReg getNextUserSGPR() const {
+ assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
+ return AMDGPU::SGPR0 + NumUserSGPRs;
+ }
+
+ MCPhysReg getNextSystemSGPR() const {
+ return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
+ }
+
+public:
struct SpilledReg {
unsigned VGPR;
int Lane;
@@ -46,16 +116,162 @@ public:
SIMachineFunctionInfo(const MachineFunction &MF);
SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
unsigned SubIdx);
- unsigned PSInputAddr;
- unsigned NumUserSGPRs;
- std::map<unsigned, unsigned> LaneVGPRs;
- unsigned LDSWaveSpillSize;
- unsigned ScratchOffsetReg;
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
- bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
- void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
+
+ // Add user SGPRs.
+ unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
+ unsigned addDispatchPtr(const SIRegisterInfo &TRI);
+ unsigned addQueuePtr(const SIRegisterInfo &TRI);
+ unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
+
+ // Add system SGPRs.
+ unsigned addWorkGroupIDX() {
+ WorkGroupIDXSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDXSystemSGPR;
+ }
+
+ unsigned addWorkGroupIDY() {
+ WorkGroupIDYSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDYSystemSGPR;
+ }
+
+ unsigned addWorkGroupIDZ() {
+ WorkGroupIDZSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupIDZSystemSGPR;
+ }
+
+ unsigned addWorkGroupInfo() {
+ WorkGroupInfoSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return WorkGroupInfoSystemSGPR;
+ }
+
+ unsigned addPrivateSegmentWaveByteOffset() {
+ PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
+ NumSystemSGPRs += 1;
+ return PrivateSegmentWaveByteOffsetSystemSGPR;
+ }
+
+ bool hasPrivateSegmentBuffer() const {
+ return PrivateSegmentBuffer;
+ }
+
+ bool hasDispatchPtr() const {
+ return DispatchPtr;
+ }
+
+ bool hasQueuePtr() const {
+ return QueuePtr;
+ }
+
+ bool hasDispatchID() const {
+ return DispatchID;
+ }
+
+ bool hasKernargSegmentPtr() const {
+ return KernargSegmentPtr;
+ }
+
+ bool hasFlatScratchInit() const {
+ return FlatScratchInit;
+ }
+
+ bool hasGridWorkgroupCountX() const {
+ return GridWorkgroupCountX;
+ }
+
+ bool hasGridWorkgroupCountY() const {
+ return GridWorkgroupCountY;
+ }
+
+ bool hasGridWorkgroupCountZ() const {
+ return GridWorkgroupCountZ;
+ }
+
+ bool hasWorkGroupIDX() const {
+ return WorkGroupIDX;
+ }
+
+ bool hasWorkGroupIDY() const {
+ return WorkGroupIDY;
+ }
+
+ bool hasWorkGroupIDZ() const {
+ return WorkGroupIDZ;
+ }
+
+ bool hasWorkGroupInfo() const {
+ return WorkGroupInfo;
+ }
+
+ bool hasPrivateSegmentWaveByteOffset() const {
+ return PrivateSegmentWaveByteOffset;
+ }
+
+ bool hasWorkItemIDX() const {
+ return WorkItemIDX;
+ }
+
+ bool hasWorkItemIDY() const {
+ return WorkItemIDY;
+ }
+
+ bool hasWorkItemIDZ() const {
+ return WorkItemIDZ;
+ }
+
+ unsigned getNumUserSGPRs() const {
+ return NumUserSGPRs;
+ }
+
+ unsigned getNumPreloadedSGPRs() const {
+ return NumUserSGPRs + NumSystemSGPRs;
+ }
+
+ unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
+ return PrivateSegmentWaveByteOffsetSystemSGPR;
+ }
+
+ /// \brief Returns the physical register reserved for use as the resource
+ /// descriptor for scratch accesses.
+ unsigned getScratchRSrcReg() const {
+ return ScratchRSrcReg;
+ }
+
+ void setScratchRSrcReg(unsigned Reg) {
+ assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+ ScratchRSrcReg = Reg;
+ }
+
+ unsigned getScratchWaveOffsetReg() const {
+ return ScratchWaveOffsetReg;
+ }
+
+ void setScratchWaveOffsetReg(unsigned Reg) {
+ assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+ ScratchWaveOffsetReg = Reg;
+ }
+
+ bool hasSpilledSGPRs() const {
+ return HasSpilledSGPRs;
+ }
+
+ void setHasSpilledSGPRs(bool Spill = true) {
+ HasSpilledSGPRs = Spill;
+ }
+
+ bool hasSpilledVGPRs() const {
+ return HasSpilledVGPRs;
+ }
+
+ void setHasSpilledVGPRs(bool Spill = true) {
+ HasSpilledVGPRs = Spill;
+ }
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
deleted file mode 100644
index 2cd600d..0000000
--- a/contrib/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// This pass loads scratch pointer and scratch offset into a register or a
-/// frame index which can be used anywhere in the program. These values will
-/// be used for spilling VGPRs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIPrepareScratchRegs : public MachineFunctionPass {
-
-private:
- static char ID;
-
-public:
- SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI prepare scratch registers";
- }
-
-};
-
-} // End anonymous namespace
-
-char SIPrepareScratchRegs::ID = 0;
-
-FunctionPass *llvm::createSIPrepareScratchRegs() {
- return new SIPrepareScratchRegs();
-}
-
-bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- MachineBasicBlock *Entry = MF.begin();
- MachineBasicBlock::iterator I = Entry->begin();
- DebugLoc DL = I->getDebugLoc();
-
- // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
- // run this pass.
- if (!MFI->hasSpilledVGPRs())
- return false;
-
- unsigned ScratchPtrPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchOffsetPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-
- if (!Entry->isLiveIn(ScratchPtrPreloadReg))
- Entry->addLiveIn(ScratchPtrPreloadReg);
-
- if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
- Entry->addLiveIn(ScratchOffsetPreloadReg);
-
- // Load the scratch offset.
- unsigned ScratchOffsetReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = -1;
-
- if (ScratchOffsetReg != AMDGPU::NoRegister) {
- // Found an SGPR to use
- MRI.setPhysRegUsed(ScratchOffsetReg);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
- .addReg(ScratchOffsetPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
- .addReg(ScratchOffsetPreloadReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- }
-
-
- // Now that we have the scratch pointer and offset values, we need to
- // add them to all the SI_SPILL_V* instructions.
-
- RegScavenger RS;
- unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
- RS.addScavengingFrameIndex(ScratchRsrcFI);
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- // Add the scratch offset reg as a live-in so that the register scavenger
- // doesn't re-use it.
- if (!MBB.isLiveIn(ScratchOffsetReg) &&
- ScratchOffsetReg != AMDGPU::NoRegister)
- MBB.addLiveIn(ScratchOffsetReg);
- RS.enterBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- RS.forward(I);
- DebugLoc DL = MI.getDebugLoc();
- if (!TII->isVGPRSpill(MI.getOpcode()))
- continue;
-
- // Scratch resource
- unsigned ScratchRsrcReg =
- RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
- uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc23 & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc23 >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- // Scratch Offset
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
- ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
- ScratchOffsetReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
- MBB.addLiveIn(ScratchOffsetReg);
- }
-
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF.getFunction()->getContext();
- Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchRsrcReg = AMDGPU::SGPR0;
- ScratchOffsetReg = AMDGPU::SGPR0;
- }
- MI.getOperand(2).setReg(ScratchRsrcReg);
- MI.getOperand(2).setIsKill(true);
- MI.getOperand(2).setIsUndef(false);
- MI.getOperand(3).setReg(ScratchOffsetReg);
- MI.getOperand(3).setIsUndef(false);
- MI.getOperand(3).setIsKill(false);
- MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
- }
- }
- return true;
-}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e9e8412..3cdffef 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "SIRegisterInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
@@ -33,6 +32,40 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
Reserved.set(*R);
}
+unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ if (ST.hasSGPRInitBug()) {
+ unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+ unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
+ return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+ }
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
+ // next sgpr128 down.
+ return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
+ }
+
+ return AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const {
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ if (ST.hasSGPRInitBug()) {
+ unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+ return AMDGPU::SGPR_32RegClass.getRegister(Idx);
+ }
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // Next register before reservations for flat_scr and vcc.
+ return AMDGPU::SGPR97;
+ }
+
+ return AMDGPU::SGPR95;
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -42,13 +75,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
- // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
- reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
- reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
+ // Reserve the last 2 registers so we will always have at least 2 more that
+ // will physically contain VCC.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103);
+
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+ if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
+ // for VCC/FLAT_SCR.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+ }
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
- if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+ if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
// Assume XNACK_MASK is unused.
@@ -60,34 +102,57 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
+ // Reserve 1 SGPR for scratch wave offset in case we need to spill.
+ reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
+ }
+
+ unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
+ if (ScratchRSrcReg != AMDGPU::NoRegister) {
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
+ // to spill.
+ // TODO: May need to reserve a VGPR if doing LDS spilling.
+ reserveRegisterTuples(Reserved, ScratchRSrcReg);
+ assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
+ }
+
return Reserved;
}
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
unsigned Idx) const {
-
const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
// FIXME: We should adjust the max number of waves based on LDS size.
unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
STI.getMaxWavesPerCU());
unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
+ unsigned VSLimit = SGPRLimit + VGPRLimit;
+
for (regclass_iterator I = regclass_begin(), E = regclass_end();
I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
- unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+ unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1);
unsigned Limit;
- if (isSGPRClass(*I)) {
+ if (isPseudoRegClass(RC)) {
+ // FIXME: This is a hack. We should never be considering the pressure of
+ // these since no virtual register should ever have this class.
+ Limit = VSLimit;
+ } else if (isSGPRClass(RC)) {
Limit = SGPRLimit / NumSubRegs;
} else {
Limit = VGPRLimit / NumSubRegs;
}
- const int *Sets = getRegClassPressureSets(*I);
+ const int *Sets = getRegClassPressureSets(RC);
assert(Sets);
for (unsigned i = 0; Sets[i] != -1; ++i) {
- if (Sets[i] == (int)Idx)
+ if (Sets[i] == (int)Idx)
return Limit;
}
}
@@ -174,17 +239,17 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned SubReg = NumSubRegs > 1 ?
getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
Value;
- bool IsKill = (i == e - 1);
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(SubReg, getDefRegState(IsLoad))
- .addReg(ScratchRsrcReg, getKillRegState(IsKill))
- .addReg(SOffset)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+ .addReg(SubReg, getDefRegState(IsLoad))
+ .addReg(ScratchRsrcReg)
+ .addReg(SOffset)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
}
}
@@ -228,6 +293,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg)
.addImm(Spill.Lane);
+ // FIXME: Since this spills to another register instead of an actual
+ // frame index, we should delete the frame index when all references to
+ // it are fixed.
}
MI->eraseFromParent();
break;
@@ -263,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// TODO: only do this when it is needed
switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
- TII->insertNOPs(MI, 3);
+ // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
+ // ("S_NOP 3") on SI
+ TII->insertWaitStates(MI, 4);
break;
case AMDGPUSubtarget::SEA_ISLANDS:
break;
default: // VOLCANIC_ISLANDS and later
- // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
- // and later. This also applies to VALUs which write VCC, but we're
- // unlikely to see VMEM use VCC.
- TII->insertNOPs(MI, 4);
+ // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
+ // ("S_NOP 4") on VI and later. This also applies to VALUs which write
+ // VCC, but we're unlikely to see VMEM use VCC.
+ TII->insertWaitStates(MI, 5);
}
MI->eraseFromParent();
@@ -322,22 +391,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
}
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
- MVT VT) const {
- switch(VT.SimpleTy) {
- default:
- case MVT::i32: return &AMDGPU::VGPR_32RegClass;
- }
-}
-
unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
+// FIXME: This is very slow. It might be worth creating a map from physreg to
+// register class.
const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
- static const TargetRegisterClass *BaseClasses[] = {
+ static const TargetRegisterClass *const BaseClasses[] = {
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::VReg_64RegClass,
@@ -359,33 +422,45 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
return nullptr;
}
+// TODO: It might be helpful to have some target specific flags in
+// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
- getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+ switch (RC->getSize()) {
+ case 4:
+ return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
+ case 8:
+ return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
+ case 12:
+ return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
+ case 16:
+ return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+ case 32:
+ return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
+ case 64:
+ return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
}
const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const {
- if (hasVGPRs(SRC)) {
- return SRC;
- } else if (SRC == &AMDGPU::SCCRegRegClass) {
- return &AMDGPU::VCCRegRegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
- return &AMDGPU::VGPR_32RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
- return &AMDGPU::VReg_64RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
- return &AMDGPU::VReg_128RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
- return &AMDGPU::VReg_256RegClass;
- } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
- return &AMDGPU::VReg_512RegClass;
- }
- return nullptr;
+ switch (SRC->getSize()) {
+ case 4:
+ return &AMDGPU::VGPR_32RegClass;
+ case 8:
+ return &AMDGPU::VReg_64RegClass;
+ case 12:
+ return &AMDGPU::VReg_96RegClass;
+ case 16:
+ return &AMDGPU::VReg_128RegClass;
+ case 32:
+ return &AMDGPU::VReg_256RegClass;
+ case 64:
+ return &AMDGPU::VReg_512RegClass;
+ default:
+ llvm_unreachable("Invalid register class size");
+ }
}
const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -402,6 +477,30 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
}
}
+bool SIRegisterInfo::shouldRewriteCopySrc(
+ const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const {
+ // We want to prefer the smallest register class possible, so we don't want to
+ // stop and rewrite on anything that looks like a subregister
+ // extract. Operations mostly don't care about the super register class, so we
+ // only want to stop on the most basic of copies between the smae register
+ // class.
+ //
+ // e.g. if we have something like
+ // vreg0 = ...
+ // vreg1 = ...
+ // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
+ // vreg3 = COPY vreg2, sub0
+ //
+ // We want to look through the COPY to find:
+ // => vreg3 = COPY vreg0
+
+ // Plain copy.
+ return getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
const TargetRegisterClass *SubRC,
unsigned Channel) const {
@@ -462,30 +561,47 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
return OpType == AMDGPU::OPERAND_REG_INLINE_C;
}
+// FIXME: Most of these are flexible with HSA and we don't need to reserve them
+// as input registers if unused. Whether the dispatch ptr is necessary should be
+// easy to detect from used intrinsics. Scratch setup is harder to know.
unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ (void)ST;
switch (Value) {
- case SIRegisterInfo::TGID_X:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
- case SIRegisterInfo::TGID_Y:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
- case SIRegisterInfo::TGID_Z:
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
- case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
- if (MFI->getShaderType() != ShaderType::COMPUTE)
- return MFI->ScratchOffsetReg;
- return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
- case SIRegisterInfo::SCRATCH_PTR:
- return AMDGPU::SGPR2_SGPR3;
- case SIRegisterInfo::INPUT_PTR:
- return AMDGPU::SGPR0_SGPR1;
- case SIRegisterInfo::TIDIG_X:
+ case SIRegisterInfo::WORKGROUP_ID_X:
+ assert(MFI->hasWorkGroupIDX());
+ return MFI->WorkGroupIDXSystemSGPR;
+ case SIRegisterInfo::WORKGROUP_ID_Y:
+ assert(MFI->hasWorkGroupIDY());
+ return MFI->WorkGroupIDYSystemSGPR;
+ case SIRegisterInfo::WORKGROUP_ID_Z:
+ assert(MFI->hasWorkGroupIDZ());
+ return MFI->WorkGroupIDZSystemSGPR;
+ case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
+ return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
+ case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
+ assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations");
+ assert(MFI->hasPrivateSegmentBuffer());
+ return MFI->PrivateSegmentBufferUserSGPR;
+ case SIRegisterInfo::KERNARG_SEGMENT_PTR:
+ assert(MFI->hasKernargSegmentPtr());
+ return MFI->KernargSegmentPtrUserSGPR;
+ case SIRegisterInfo::DISPATCH_PTR:
+ assert(MFI->hasDispatchPtr());
+ return MFI->DispatchPtrUserSGPR;
+ case SIRegisterInfo::QUEUE_PTR:
+ llvm_unreachable("not implemented");
+ case SIRegisterInfo::WORKITEM_ID_X:
+ assert(MFI->hasWorkItemIDX());
return AMDGPU::VGPR0;
- case SIRegisterInfo::TIDIG_Y:
+ case SIRegisterInfo::WORKITEM_ID_Y:
+ assert(MFI->hasWorkItemIDY());
return AMDGPU::VGPR1;
- case SIRegisterInfo::TIDIG_Z:
+ case SIRegisterInfo::WORKITEM_ID_Z:
+ assert(MFI->hasWorkItemIDZ());
return AMDGPU::VGPR2;
}
llvm_unreachable("unexpected preloaded value type");
@@ -496,12 +612,9 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
// AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
- I != E; ++I) {
- if (!MRI.isPhysRegUsed(*I))
- return *I;
- }
+ for (unsigned Reg : *RC)
+ if (!MRI.isPhysRegUsed(Reg))
+ return Reg;
return AMDGPU::NoRegister;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 7da6de2..1795237 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -18,6 +18,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
namespace llvm {
@@ -29,6 +30,15 @@ private:
public:
SIRegisterInfo();
+ /// Return the end register initially reserved for the scratch buffer in case
+ /// spilling is needed.
+ unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
+
+ /// Return the end register initially reserved for the scratch wave offset in
+ /// case spilling is needed.
+ unsigned reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
unsigned getRegPressureSetLimit(const MachineFunction &MF,
@@ -40,10 +50,6 @@ public:
unsigned FIOperandNum,
RegScavenger *RS) const override;
- /// \brief get the register class of the specified type to use in the
- /// CFGStructurizer
- const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
-
unsigned getHWRegIndex(unsigned Reg) const override;
/// \brief Return the 'base' register class for this register.
@@ -52,23 +58,30 @@ public:
/// \returns true if this class contains only SGPR registers
bool isSGPRClass(const TargetRegisterClass *RC) const {
- if (!RC)
- return false;
-
return !hasVGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
bool isSGPRClassID(unsigned RCID) const {
- if (static_cast<int>(RCID) == -1)
- return false;
-
return isSGPRClass(getRegClass(RCID));
}
+ bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ return isSGPRClass(MRI.getRegClass(Reg));
+ return getPhysRegClass(Reg);
+ }
+
/// \returns true if this class contains VGPR registers.
bool hasVGPRs(const TargetRegisterClass *RC) const;
+ /// returns true if this is a pseudoregister class combination of VGPRs and
+ /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on
+ /// them.
+ static bool isPseudoRegClass(const TargetRegisterClass *RC) {
+ return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass;
+ }
+
/// \returns A VGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const;
@@ -79,6 +92,11 @@ public:
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;
+ bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+ unsigned DefSubReg,
+ const TargetRegisterClass *SrcRC,
+ unsigned SrcSubReg) const override;
+
/// \p Channel This is the register channel (e.g. a value from 0-16), not the
/// SubReg index.
/// \returns The sub-register of Reg that is in Channel.
@@ -91,19 +109,25 @@ public:
/// \returns True if operands defined with this operand type can accept
/// an inline constant. i.e. An integer value in the range (-16, 64) or
- /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
+ /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
bool opCanUseInlineConstant(unsigned OpType) const;
enum PreloadedValue {
- TGID_X,
- TGID_Y,
- TGID_Z,
- SCRATCH_WAVE_OFFSET,
- SCRATCH_PTR,
- INPUT_PTR,
- TIDIG_X,
- TIDIG_Y,
- TIDIG_Z
+ // SGPRS:
+ PRIVATE_SEGMENT_BUFFER = 0,
+ DISPATCH_PTR = 1,
+ QUEUE_PTR = 2,
+ KERNARG_SEGMENT_PTR = 3,
+ WORKGROUP_ID_X = 10,
+ WORKGROUP_ID_Y = 11,
+ WORKGROUP_ID_Z = 12,
+ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+
+ // VGPRS:
+ FIRST_VGPR_VALUE = 15,
+ WORKITEM_ID_X = FIRST_VGPR_VALUE,
+ WORKITEM_ID_Y = 16,
+ WORKITEM_ID_Z = 17
};
/// \brief Returns the physical register that \p Value is stored in.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 2a9017f..bfaf937 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -10,10 +10,13 @@
//===----------------------------------------------------------------------===//
// Declarations that describe the SI registers
//===----------------------------------------------------------------------===//
-
-class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+ DwarfRegNum<[!cast<int>(HWEncoding)]> {
let Namespace = "AMDGPU";
- let HWEncoding = encoding;
+
+ // This is the not yet the complete register encoding. An additional
+ // bit is set for VGPRs.
+ let HWEncoding = regIdx;
}
// Special Registers
@@ -21,7 +24,8 @@ def VCC_LO : SIReg<"vcc_lo", 106>;
def VCC_HI : SIReg<"vcc_hi", 107>;
// VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+ DwarfRegAlias<VCC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 106;
@@ -30,7 +34,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
def EXEC_LO : SIReg<"exec_lo", 126>;
def EXEC_HI : SIReg<"exec_hi", 127>;
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+ DwarfRegAlias<EXEC_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 126;
@@ -39,18 +44,29 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;
-def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
-def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
+multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
+ def _ci : SIReg<n, ci_e>;
+ def _vi : SIReg<n, vi_e>;
+ def "" : SIReg<"", 0>;
+}
-// Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
+class FlatReg <Register lo, Register hi, bits<16> encoding> :
+ RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+ DwarfRegAlias<lo> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
- let HWEncoding = 104;
+ let HWEncoding = encoding;
}
+defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
+defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
+
+def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
+def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
+def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
+
// SGPR registers
-foreach Index = 0-101 in {
+foreach Index = 0-103 in {
def SGPR#Index : SIReg <"SGPR"#Index, Index>;
}
@@ -65,25 +81,27 @@ foreach Index = 0-255 in {
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
+// TODO: Do we need to set DwarfRegAlias on register tuples?
+
// SGPR 32-bit registers
def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
- (add (sequence "SGPR%u", 0, 101))>;
+ (add (sequence "SGPR%u", 0, 103))>;
// SGPR 64-bit registers
def SGPR_64Regs : RegisterTuples<[sub0, sub1],
- [(add (decimate (trunc SGPR_32, 101), 2)),
+ [(add (decimate SGPR_32, 2)),
(add (decimate (shl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
- [(add (decimate (trunc SGPR_32, 99), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4))]>;
// SGPR 256-bit registers
def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
- [(add (decimate (trunc SGPR_32, 95), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
@@ -95,7 +113,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
// SGPR 512-bit registers
def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
- [(add (decimate (trunc SGPR_32, 87), 4)),
+ [(add (decimate SGPR_32, 4)),
(add (decimate (shl SGPR_32, 1), 4)),
(add (decimate (shl SGPR_32, 2), 4)),
(add (decimate (shl SGPR_32, 3), 4)),
@@ -174,44 +192,57 @@ class RegImmMatcher<string name> : AsmOperandClass {
let RenderMethod = "addRegOrImmOperands";
}
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
- let CopyCost = -1; // Theoretically it is possible to read from SCC,
- // but it should never be necessary.
-}
-
-def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-
// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
- (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
+ (add SGPR_64, VCC, EXEC, FLAT_SCR)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
+ // Requires 2 s_mov_b64 to copy
+ let CopyCost = 2;
+}
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> {
+ // Requires 4 s_mov_b64 to copy
+ let CopyCost = 4;
+}
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+ // Requires 8 s_mov_b64 to copy
+ let CopyCost = 8;
+}
// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
+ // Requires 2 v_mov_b32 to copy
+ let CopyCost = 2;
+}
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
let Size = 96;
+
+ // Requires 3 v_mov_b32 to copy
+ let CopyCost = 3;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+ // Requires 4 v_mov_b32 to copy
+ let CopyCost = 4;
+}
-def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> {
+ let CopyCost = 8;
+}
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+ let CopyCost = 16;
+}
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
@@ -253,7 +284,9 @@ def SCSrc_32 : RegInlineOperand<SReg_32> {
def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+ let CopyCost = 2;
+}
def VSrc_32 : RegisterOperand<VS_32> {
let OperandNamespace = "AMDGPU";
@@ -282,3 +315,13 @@ def VCSrc_64 : RegisterOperand<VS_64> {
let OperandType = "OPERAND_REG_INLINE_C";
let ParserMatchClass = RegImmMatcher<"VCSrc64">;
}
+
+//===----------------------------------------------------------------------===//
+// SCSrc_* Operands with an SGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def SCSrc_64 : RegisterOperand<SReg_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"SCSrc64">;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
index 9b1f676..cd77e51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -17,16 +17,28 @@ def WriteLDS : SchedWrite;
def WriteSALU : SchedWrite;
def WriteSMEM : SchedWrite;
def WriteVMEM : SchedWrite;
+def WriteBarrier : SchedWrite;
// Vector ALU instructions
def Write32Bit : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
+def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
-def WriteDouble : SchedWrite;
+// Slow quarter rate f64 instruction.
+def WriteDouble : SchedWrite;
+
+// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
+// Half rate 64-bit instructions.
+def Write64Bit : SchedWrite;
+
+// FIXME: Should there be a class for instructions which are VALU
+// instructions and have VALU rates, but write to the SALU (i.e. VOPC
+// instructions)
+
def SIFullSpeedModel : SchedMachineModel;
def SIQuarterSpeedModel : SchedMachineModel;
@@ -53,7 +65,7 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
// The latency numbers are taken from AMD Accelerated Parallel Processing
-// guide. They may not be acurate.
+// guide. They may not be accurate.
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
@@ -64,8 +76,10 @@ multiclass SICommonWriteRes {
def : HWWriteRes<WriteSALU, [HWSALU], 1>;
def : HWWriteRes<WriteSMEM, [HWLGKM], 10>; // XXX: Guessed ???
def : HWWriteRes<WriteVMEM, [HWVMEM], 450>; // 300 - 600
+ def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
+ def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 5d00bdd..4f0913f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -141,8 +141,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
if (!MRI.isSSA())
return;
- assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
- TII->isVOPC(MI.getOpcode()));
+ assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
const SIRegisterInfo &TRI = TII->getRegisterInfo();
int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
@@ -187,6 +186,21 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
}
+// Copy MachineOperand with all flags except setting it as implicit.
+static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) {
+ assert(!Orig.isImplicit());
+ return MachineOperand::CreateReg(Orig.getReg(),
+ Orig.isDef(),
+ true,
+ Orig.isKill(),
+ Orig.isDead(),
+ Orig.isUndef(),
+ Orig.isEarlyClobber(),
+ Orig.getSubReg(),
+ Orig.isDebug(),
+ Orig.isInternalRead());
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const SIInstrInfo *TII =
@@ -236,14 +250,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (TII->isVOPC(Op32)) {
unsigned DstReg = MI.getOperand(0).getReg();
if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
- // VOPC instructions can only write to the VCC register. We can't
- // force them to use VCC here, because the register allocator has
- // trouble with sequences like this, which cause the allocator to run
- // out of registers if vreg0 and vreg1 belong to the VCCReg register
- // class:
- // vreg0 = VOPC;
- // vreg1 = VOPC;
- // S_AND_B64 vreg0, vreg1
+ // VOPC instructions can only write to the VCC register. We can't
+ // force them to use VCC here, because this is only one register and
+ // cannot deal with sequences which would require multiple copies of
+ // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
//
// So, instead of forcing the instruction to write to VCC, we provide
// a hint to the register allocator to use VCC and then we we will run
@@ -272,13 +282,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
// We can shrink this instruction
- DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
+ DEBUG(dbgs() << "Shrinking " << MI);
MachineInstrBuilder Inst32 =
BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
- // dst
- Inst32.addOperand(MI.getOperand(0));
+ // Add the dst operand if the 32-bit encoding also has an explicit $dst.
+ // For VOPC instructions, this is replaced by an implicit def of vcc.
+ int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst);
+ if (Op32DstIdx != -1) {
+ // dst
+ Inst32.addOperand(MI.getOperand(0));
+ } else {
+ assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+ "Unexpected case");
+ }
+
Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
@@ -288,9 +307,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
Inst32.addOperand(*Src1);
const MachineOperand *Src2 =
- TII->getNamedOperand(MI, AMDGPU::OpName::src2);
- if (Src2)
- Inst32.addOperand(*Src2);
+ TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ if (Src2) {
+ int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+ if (Op32Src2Idx != -1) {
+ Inst32.addOperand(*Src2);
+ } else {
+ // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+ // replaced with an implicit read of vcc.
+ assert(Src2->getReg() == AMDGPU::VCC &&
+ "Unexpected missing register operand");
+ Inst32.addOperand(copyRegOperandAsImplicit(*Src2));
+ }
+ }
++NumInstructionsShrunk;
MI.eraseFromParent();
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
index 591ce85..dbdc76b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -22,6 +22,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -61,14 +62,7 @@ bool SITypeRewriter::doInitialization(Module &M) {
}
bool SITypeRewriter::runOnFunction(Function &F) {
- Attribute A = F.getFnAttribute("ShaderType");
-
- unsigned ShaderType = ShaderType::COMPUTE;
- if (A.isStringAttribute()) {
- StringRef Str = A.getValueAsString();
- Str.getAsInteger(0, ShaderType);
- }
- if (ShaderType == ShaderType::COMPUTE)
+ if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE)
return false;
visit(F);
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b76b400..add415e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,12 +7,23 @@
//
//===----------------------------------------------------------------------===//
#include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
+
namespace llvm {
namespace AMDGPU {
@@ -56,5 +67,91 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.private_segment_alignment = 4;
}
+MCSection *getHSATextSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_AMDGPU_HSA_AGENT |
+ ELF::SHF_AMDGPU_HSA_CODE);
+}
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_AMDGPU_HSA_GLOBAL |
+ ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE |
+ ELF::SHF_AMDGPU_HSA_GLOBAL);
+}
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
+ return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
+ ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+bool isGroupSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}
+
+bool isGlobalSegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}
+
+bool isReadOnlySegment(const GlobalValue *GV) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+static const char ShaderTypeAttribute[] = "ShaderType";
+
+unsigned getShaderType(const Function &F) {
+ Attribute A = F.getFnAttribute(ShaderTypeAttribute);
+ unsigned ShaderType = ShaderType::COMPUTE;
+
+ if (A.isStringAttribute()) {
+ StringRef Str = A.getValueAsString();
+ if (Str.getAsInteger(0, ShaderType)) {
+ LLVMContext &Ctx = F.getContext();
+ Ctx.emitError("can't parse shader type");
+ }
+ }
+ return ShaderType;
+}
+
+bool isSI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
+}
+
+bool isCI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
+}
+
+bool isVI(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+}
+
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+
+ switch(Reg) {
+ default: break;
+ case AMDGPU::FLAT_SCR:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
+
+ case AMDGPU::FLAT_SCR_LO:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
+
+ case AMDGPU::FLAT_SCR_HI:
+ assert(!isSI(STI));
+ return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
+ }
+ return Reg;
+}
+
} // End namespace AMDGPU
} // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f57028c..19419a2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -15,6 +15,11 @@
namespace llvm {
class FeatureBitset;
+class Function;
+class GlobalValue;
+class MCContext;
+class MCSection;
+class MCSubtargetInfo;
namespace AMDGPU {
@@ -27,6 +32,27 @@ struct IsaVersion {
IsaVersion getIsaVersion(const FeatureBitset &Features);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
+MCSection *getHSATextSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
+
+bool isGroupSegment(const GlobalValue *GV);
+bool isGlobalSegment(const GlobalValue *GV);
+bool isReadOnlySegment(const GlobalValue *GV);
+
+unsigned getShaderType(const Function &F);
+
+bool isSI(const MCSubtargetInfo &STI);
+bool isCI(const MCSubtargetInfo &STI);
+bool isVI(const MCSubtargetInfo &STI);
+
+/// If \p Reg is a pseudo reg, return the correct hardware register given
+/// \p STI otherwise return \p Reg.
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
index aca4673..20a026a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -73,8 +73,8 @@ defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
} // End isCommutable = 1
defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
-// Aliases to simplify matching of floating-pint instructions that are VOP2 on
-// SI and VOP3 on VI.
+// Aliases to simplify matching of floating-point instructions that
+// are VOP2 on SI and VOP3 on VI.
class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
name#" $dst, $src0, $src1",
@@ -89,60 +89,15 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-
//===----------------------------------------------------------------------===//
-// SMEM Patterns
+// SMEM Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [isVI] in {
+def S_DCACHE_WB : SMEM_Inval <0x21,
+ "s_dcache_wb", int_amdgcn_s_dcache_wb>;
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
-
-// Patterns for global loads with no offset
-class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr)),
- (inst $addr, 0, 0, 0)
->;
-
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
-
-class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (node vt:$data, i64:$addr),
- (inst $data, $addr, 0, 0, 0)
->;
-
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
-
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
- (vt (node i64:$addr, vt:$data)),
- (inst $addr, $data, 0, 0)
->;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
+ "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-} // End Predicates = [isVI]
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 9550a3a..cd7540e 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -35,7 +35,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMGlobalBaseRegPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index ef609a6..a44dc83 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -17,6 +17,17 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// ARM Helper classes.
+//
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+class Architecture<string fname, string aname, list<SubtargetFeature> features >
+ : SubtargetFeature<fname, "ARMArch", aname,
+ !strconcat(aname, " architecture"), features>;
+
+//===----------------------------------------------------------------------===//
// ARM Subtarget state.
//
@@ -51,8 +62,11 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
"true", "Enable ARMv8 FP",
[FeatureVFP4]>;
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+ "Enable full half-precision floating point",
+ [FeatureFPARMv8]>;
def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true",
- "Restrict VFP3 to 16 double registers">;
+ "Restrict FP to 16 double registers">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
@@ -119,9 +133,9 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
"Has return address stack">;
-/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
-def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
- "Supports v7 DSP instructions in Thumb2">;
+/// DSP extension.
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+ "Supports DSP instructions in ARM and/or Thumb2">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
@@ -150,11 +164,28 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
"NaCl trap">;
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+ "StrictAlign", "true",
+ "Disallow all unaligned memory "
+ "access">;
+
def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
"Generate calls via indirect call "
"instructions">;
-// ARM ISAs.
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+ "Reserve R9, making it unavailable as "
+ "GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+ "Don't use movt/movw pairs for 32-bit "
+ "imms">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM ISAa.
+//
+
def HasV4TOps : SubtargetFeature<"v4t", "HasV4TOps", "true",
"Support ARM v4T instructions">;
def HasV5TOps : SubtargetFeature<"v5t", "HasV5TOps", "true",
@@ -180,302 +211,444 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
[HasV6T2Ops, FeaturePerfMon]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureVirtualization,
- FeatureMP]>;
+ [HasV7Ops]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
- [HasV8Ops, FeatureAClass, FeatureCRC]>;
+ [HasV8Ops]>;
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+ "Support ARM v8.2a instructions",
+ [HasV8_1aOps]>;
+
//===----------------------------------------------------------------------===//
-// ARM Processors supported.
+// ARM Processor subtarget features.
//
-include "ARMSchedule.td"
-
-// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
- "Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone, FeatureMP]>;
+ "Cortex-A5 ARM processors", []>;
def ProcA7 : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
- "Cortex-A7 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureVFP4, FeatureMP,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureVirtualization]>;
+ "Cortex-A7 ARM processors", []>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
- "Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVMLxForwarding, FeatureT2XtPk,
- FeatureTrustZone]>;
+ "Cortex-A8 ARM processors", []>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
- "Cortex-A9 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone]>;
-def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
- "Swift ARM processors",
- [FeatureNEONForFP, FeatureT2XtPk,
- FeatureVFP4, FeatureMP, FeatureHWDiv,
- FeatureHWDivARM, FeatureAvoidPartialCPSR,
- FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx, FeatureTrustZone]>;
+ "Cortex-A9 ARM processors", []>;
def ProcA12 : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
- "Cortex-A12 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
-
-// FIXME: It has not been determined if A15 has these features.
-def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
- "Cortex-A15 ARM processors",
- [FeatureT2XtPk, FeatureVFP4,
- FeatureMP, FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone, FeatureVirtualization]>;
-
+ "Cortex-A12 ARM processors", []>;
+def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+ "Cortex-A15 ARM processors", []>;
def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
- "Cortex-A17 ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureVFP4,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureAvoidPartialCPSR,
- FeatureVirtualization,
- FeatureTrustZone]>;
-
+ "Cortex-A17 ARM processors", []>;
+def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors", []>;
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
- "Cortex-A53 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
-
+ "Cortex-A53 ARM processors", []>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
- "Cortex-A57 ARM processors",
- [FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto, FeatureCRC]>;
+ "Cortex-A57 ARM processors", []>;
+def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+ "Cortex-A72 ARM processors", []>;
-def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
- "Cortex-R4 ARM processors",
- [FeatureHWDiv,
- FeatureAvoidPartialCPSR,
- FeatureDSPThumb2, FeatureT2XtPk,
- HasV7Ops, FeatureDB, FeatureHasRAS,
- FeatureRClass]>;
+def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
+ "Qualcomm ARM processors", []>;
+def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+ "Swift ARM processors", []>;
+
+def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+ "Cortex-R4 ARM processors", []>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
- "Cortex-R5 ARM processors",
- [FeatureSlowFPBrcc,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureHasSlowFPVMLx,
- FeatureAvoidPartialCPSR,
- FeatureT2XtPk]>;
-
-// FIXME: krait has currently the same features as A9
-// plus VFP4 and hardware division features.
-def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors",
- [FeatureVMLxForwarding,
- FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR,
- FeatureTrustZone,
- FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM]>;
+ "Cortex-R5 ARM processors", []>;
+def ProcR7 : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
+ "Cortex-R7 ARM processors", []>;
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+//===----------------------------------------------------------------------===//
+// ARM schedules.
+//
+
+include "ARMSchedule.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM architectures
+//
+
+def ARMv2 : Architecture<"armv2", "ARMv2", []>;
+
+def ARMv2a : Architecture<"armv2a", "ARMv2a", []>;
+
+def ARMv3 : Architecture<"armv3", "ARMv3", []>;
+
+def ARMv3m : Architecture<"armv3m", "ARMv3m", []>;
+
+def ARMv4 : Architecture<"armv4", "ARMv4", []>;
+
+def ARMv4t : Architecture<"armv4t", "ARMv4t", [HasV4TOps]>;
+
+def ARMv5t : Architecture<"armv5t", "ARMv5t", [HasV5TOps]>;
+
+def ARMv5te : Architecture<"armv5te", "ARMv5te", [HasV5TEOps]>;
+
+def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
+
+def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
+
+def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
+ FeatureDSP]>;
+
+def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
+
+def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps,
+ FeatureTrustZone]>;
+
+def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureMClass]>;
+
+def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureAClass]>;
+
+def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
+ FeatureDB,
+ FeatureDSP,
+ FeatureHWDiv,
+ FeatureRClass]>;
+
+def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass]>;
+
+def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
+ FeatureThumb2,
+ FeatureNoARM,
+ FeatureDB,
+ FeatureHWDiv,
+ FeatureMClass,
+ FeatureDSP,
+ FeatureT2XtPk]>;
+
+def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv81a : Architecture<"armv8.1-a", "ARMv81a", [HasV8_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+// Aliases
+def IWMMXT : Architecture<"iwmmxt", "ARMv5te", [ARMv5te]>;
+def IWMMXT2 : Architecture<"iwmmxt2", "ARMv5te", [ARMv5te]>;
+def XScale : Architecture<"xscale", "ARMv5te", [ARMv5te]>;
+def ARMv6j : Architecture<"armv6j", "ARMv7a", [ARMv6]>;
+def ARMv7k : Architecture<"armv7k", "ARMv7a", [ARMv7a]>;
+def ARMv7s : Architecture<"armv7s", "ARMv7a", [ARMv7a]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
+
+// Dummy CPU, used to target architectures
+def : ProcNoItin<"generic", []>;
+
+def : ProcNoItin<"arm8", [ARMv4]>;
+def : ProcNoItin<"arm810", [ARMv4]>;
+def : ProcNoItin<"strongarm", [ARMv4]>;
+def : ProcNoItin<"strongarm110", [ARMv4]>;
+def : ProcNoItin<"strongarm1100", [ARMv4]>;
+def : ProcNoItin<"strongarm1110", [ARMv4]>;
+
+def : ProcNoItin<"arm7tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm7tdmi-s", [ARMv4t]>;
+def : ProcNoItin<"arm710t", [ARMv4t]>;
+def : ProcNoItin<"arm720t", [ARMv4t]>;
+def : ProcNoItin<"arm9", [ARMv4t]>;
+def : ProcNoItin<"arm9tdmi", [ARMv4t]>;
+def : ProcNoItin<"arm920", [ARMv4t]>;
+def : ProcNoItin<"arm920t", [ARMv4t]>;
+def : ProcNoItin<"arm922t", [ARMv4t]>;
+def : ProcNoItin<"arm940t", [ARMv4t]>;
+def : ProcNoItin<"ep9312", [ARMv4t]>;
+
+def : ProcNoItin<"arm10tdmi", [ARMv5t]>;
+def : ProcNoItin<"arm1020t", [ARMv5t]>;
+
+def : ProcNoItin<"arm9e", [ARMv5te]>;
+def : ProcNoItin<"arm926ej-s", [ARMv5te]>;
+def : ProcNoItin<"arm946e-s", [ARMv5te]>;
+def : ProcNoItin<"arm966e-s", [ARMv5te]>;
+def : ProcNoItin<"arm968e-s", [ARMv5te]>;
+def : ProcNoItin<"arm10e", [ARMv5te]>;
+def : ProcNoItin<"arm1020e", [ARMv5te]>;
+def : ProcNoItin<"arm1022e", [ARMv5te]>;
+def : ProcNoItin<"xscale", [ARMv5te]>;
+def : ProcNoItin<"iwmmxt", [ARMv5te]>;
+
+def : Processor<"arm1136j-s", ARMV6Itineraries, [ARMv6]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ARMv6,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"cortex-m0", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m0plus", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"cortex-m1", ARMV6Itineraries, [ARMv6m]>;
+def : Processor<"sc000", ARMV6Itineraries, [ARMv6m]>;
+
+def : Processor<"arm1176jz-s", ARMV6Itineraries, [ARMv6kz]>;
+def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ARMv6kz,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"mpcorenovfp", ARMV6Itineraries, [ARMv6k]>;
+def : Processor<"mpcore", ARMV6Itineraries, [ARMv6k,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
+
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ARMv6t2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ARMv6t2,
+ FeatureVFP2,
+ FeatureHasSlowFPVMLx]>;
-// V4 Processors.
-def : ProcNoItin<"generic", []>;
-def : ProcNoItin<"arm8", []>;
-def : ProcNoItin<"arm810", []>;
-def : ProcNoItin<"strongarm", []>;
-def : ProcNoItin<"strongarm110", []>;
-def : ProcNoItin<"strongarm1100", []>;
-def : ProcNoItin<"strongarm1110", []>;
-
-// V4T Processors.
-def : ProcNoItin<"arm7tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm7tdmi-s", [HasV4TOps]>;
-def : ProcNoItin<"arm710t", [HasV4TOps]>;
-def : ProcNoItin<"arm720t", [HasV4TOps]>;
-def : ProcNoItin<"arm9", [HasV4TOps]>;
-def : ProcNoItin<"arm9tdmi", [HasV4TOps]>;
-def : ProcNoItin<"arm920", [HasV4TOps]>;
-def : ProcNoItin<"arm920t", [HasV4TOps]>;
-def : ProcNoItin<"arm922t", [HasV4TOps]>;
-def : ProcNoItin<"arm940t", [HasV4TOps]>;
-def : ProcNoItin<"ep9312", [HasV4TOps]>;
-
-// V5T Processors.
-def : ProcNoItin<"arm10tdmi", [HasV5TOps]>;
-def : ProcNoItin<"arm1020t", [HasV5TOps]>;
-
-// V5TE Processors.
-def : ProcNoItin<"arm9e", [HasV5TEOps]>;
-def : ProcNoItin<"arm926ej-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm946e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm966e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm968e-s", [HasV5TEOps]>;
-def : ProcNoItin<"arm10e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1020e", [HasV5TEOps]>;
-def : ProcNoItin<"arm1022e", [HasV5TEOps]>;
-def : ProcNoItin<"xscale", [HasV5TEOps]>;
-def : ProcNoItin<"iwmmxt", [HasV5TEOps]>;
-
-// V6 Processors.
-def : Processor<"arm1136j-s", ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1136jf-s", ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6M Processors.
-def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m0plus", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m1", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-def : Processor<"sc000", ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
- FeatureDB, FeatureMClass]>;
-
-// V6K Processors.
-def : Processor<"arm1176jz-s", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"arm1176jzf-s", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp", ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"mpcore", ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
- FeatureHasSlowFPVMLx]>;
-
-// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries, [HasV6T2Ops,
- FeatureDSPThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
- FeatureHasSlowFPVMLx,
- FeatureDSPThumb2]>;
-
-// V7a Processors.
// FIXME: A5 has currently the same Schedule model as A8
-def : ProcessorModel<"cortex-a5", CortexA8Model,
- [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureVFP4, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-def : ProcessorModel<"cortex-a7", CortexA8Model,
- [ProcA7, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a8", CortexA8Model,
- [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
-def : ProcessorModel<"cortex-a9", CortexA9Model,
- [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a5", CortexA8Model, [ARMv7a, ProcA5,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4]>;
+
+def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureMP,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureVirtualization]>;
+
+def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-a9", CortexA9Model, [ARMv7a, ProcA9,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureMP]>;
// FIXME: A12 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a12", CortexA9Model,
- [ProcA12, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
-
-// FIXME: A15 has currently the same ProcessorModel as A9.
-def : ProcessorModel<"cortex-a15", CortexA9Model,
- [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization,
+ FeatureMP]>;
+
+// FIXME: A15 has currently the same Schedule model as A9.
+def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: A17 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a17", CortexA9Model,
- [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureMP,
- FeatureHasRAS, FeatureAClass]>;
+def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
+ FeatureHasRAS,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureVirtualization]>;
// FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait", CortexA9Model,
- [ProcKrait, HasV7Ops,
- FeatureNEON, FeatureDB,
- FeatureDSPThumb2, FeatureHasRAS,
- FeatureAClass]>;
+// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
+// division features.
+def : ProcessorModel<"krait", CortexA9Model, [ARMv7a, ProcKrait,
+ FeatureHasRAS,
+ FeatureVMLxForwarding,
+ FeatureT2XtPk,
+ FeatureFP16,
+ FeatureAvoidPartialCPSR,
+ FeatureVFP4,
+ FeatureHWDiv,
+ FeatureHWDivARM]>;
+
+def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx]>;
// FIXME: R4 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4", CortexA8Model,
- [ProcR4]>;
+def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R4F has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4f", CortexA8Model,
- [ProcR4,
- FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
- FeatureVFP3, FeatureD16]>;
+def : ProcessorModel<"cortex-r4f", CortexA8Model, [ARMv7r, ProcR4,
+ FeatureHasRAS,
+ FeatureSlowFPBrcc,
+ FeatureHasSlowFPVMLx,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R5 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r5", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS,
- FeatureD16, FeatureRClass]>;
+def : ProcessorModel<"cortex-r5", CortexA8Model, [ARMv7r, ProcR5,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureD16,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
// FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
-def : ProcessorModel<"cortex-r7", CortexA8Model,
- [ProcR5, HasV7Ops, FeatureDB,
- FeatureVFP3, FeatureDSPThumb2,
- FeatureHasRAS, FeatureVFPOnlySP,
- FeatureD16, FeatureMP, FeatureRClass]>;
-
-// V7M Processors.
-def : ProcNoItin<"cortex-m3", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-def : ProcNoItin<"sc300", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureMClass]>;
-
-// V7EM Processors.
-def : ProcNoItin<"cortex-m4", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureVFP4,
- FeatureVFPOnlySP, FeatureD16,
- FeatureMClass]>;
-def : ProcNoItin<"cortex-m7", [HasV7Ops,
- FeatureThumb2, FeatureNoARM, FeatureDB,
- FeatureHWDiv, FeatureDSPThumb2,
- FeatureT2XtPk, FeatureFPARMv8,
- FeatureD16, FeatureMClass]>;
-
-
-// Swift uArch Processors.
-def : ProcessorModel<"swift", SwiftModel,
- [ProcSwift, HasV7Ops, FeatureNEON,
- FeatureDB, FeatureDSPThumb2,
- FeatureHasRAS, FeatureAClass]>;
-
-// V8 Processors
-def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
-// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
-def : ProcNoItin<"cortex-a72", [ProcA57, HasV8Ops, FeatureAClass,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2]>;
+def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7,
+ FeatureHasRAS,
+ FeatureVFP3,
+ FeatureVFPOnlySP,
+ FeatureD16,
+ FeatureFP16,
+ FeatureMP,
+ FeatureSlowFPBrcc,
+ FeatureHWDivARM,
+ FeatureHasSlowFPVMLx,
+ FeatureAvoidPartialCPSR,
+ FeatureT2XtPk]>;
+
+def : ProcNoItin<"cortex-m3", [ARMv7m]>;
+def : ProcNoItin<"sc300", [ARMv7m]>;
+
+def : ProcNoItin<"cortex-m4", [ARMv7em,
+ FeatureVFP4,
+ FeatureVFPOnlySP,
+ FeatureD16]>;
+
+def : ProcNoItin<"cortex-m7", [ARMv7em,
+ FeatureFPARMv8,
+ FeatureD16]>;
+
+
+def : ProcNoItin<"cortex-a35", [ARMv8a, ProcA35,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureT2XtPk,
+ FeatureCrypto,
+ FeatureCRC]>;
// Cyclone is very similar to swift
-def : ProcessorModel<"cyclone", SwiftModel,
- [ProcSwift, HasV8Ops, HasV7Ops,
- FeatureCrypto, FeatureFPARMv8,
- FeatureDB,FeatureDSPThumb2,
- FeatureHasRAS, FeatureZCZeroing]>;
+def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift,
+ FeatureHasRAS,
+ FeatureNEONForFP,
+ FeatureT2XtPk,
+ FeatureVFP4,
+ FeatureMP,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureAvoidPartialCPSR,
+ FeatureAvoidMOVsShOp,
+ FeatureHasSlowFPVMLx,
+ FeatureCrypto,
+ FeatureZCZeroing]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
@@ -504,8 +677,15 @@ def ARMAsmWriter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def ARMAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "ARM";
+ string BreakCharacters = ".";
+}
+
def ARM : Target {
// Pull in Instruction Info:
let InstructionSet = ARMInstrInfo;
let AssemblyWriters = [ARMAsmWriter];
+ let AssemblyParserVariants = [ARMAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 738dded..206db96 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -60,7 +60,7 @@ using namespace llvm;
ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
- InConstantPool(false) {}
+ InConstantPool(false), OptimizationGoals(-1) {}
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
@@ -80,8 +80,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer->EmitLabel(CurrentFnSym);
}
-void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+ uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
assert(Size && "C++ constructor pointer had zero size!");
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<ARMSubtarget>();
SetupMachineFunction(MF);
+ const Function* F = MF.getFunction();
+ const TargetMachine& TM = MF.getTarget();
+
+ // Calculate this function's optimization goal.
+ unsigned OptimizationGoal;
+ if (F->hasFnAttribute(Attribute::OptimizeNone))
+ // For best debugging illusion, speed and small size sacrificed
+ OptimizationGoal = 6;
+ else if (F->optForMinSize())
+ // Aggressively for small size, speed and debug illusion sacrificed
+ OptimizationGoal = 4;
+ else if (F->optForSize())
+ // For small size, but speed and debugging illusion preserved
+ OptimizationGoal = 3;
+ else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+ // Aggressively for speed, small size and debug illusion sacrificed
+ OptimizationGoal = 2;
+ else if (TM.getOptLevel() > CodeGenOpt::None)
+ // For speed, but small size and good debug illusion preserved
+ OptimizationGoal = 1;
+ else // TM.getOptLevel() == CodeGenOpt::None
+ // For good debugging, but speed and small size preserved
+ OptimizationGoal = 5;
+
+ // Combine a new optimization goal with existing ones.
+ if (OptimizationGoals == -1) // uninitialized goals
+ OptimizationGoals = OptimizationGoal;
+ else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
+ OptimizationGoals = 0;
if (Subtarget->isTargetCOFF()) {
- bool Internal = MF.getFunction()->hasInternalLinkage();
+ bool Internal = F->hasInternalLinkage();
COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
: COFF::IMAGE_SYM_CLASS_EXTERNAL;
int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
@@ -198,22 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
MCSymbol *ARMAsmPrinter::
GetARMJTIPICJumpTableLabel(unsigned uid) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid;
return OutContext.getOrCreateSymbol(Name);
}
-
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
- const DataLayout *DL = TM.getDataLayout();
- SmallString<60> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
- << getFunctionNumber();
- return OutContext.getOrCreateSymbol(Name);
-}
-
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -515,6 +535,17 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // The last attribute to be emitted is ABI_optimization_goals
+ MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+ if (OptimizationGoals > 0 &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+ ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
+ OptimizationGoals = -1;
+
+ ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -532,7 +563,7 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
if (Subtarget->hasV8Ops())
return ARMBuildAttrs::v8;
else if (Subtarget->hasV7Ops()) {
- if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+ if (Subtarget->isMClass() && Subtarget->hasDSP())
return ARMBuildAttrs::v7E_M;
return ARMBuildAttrs::v7;
} else if (Subtarget->hasV6T2Ops())
@@ -587,7 +618,7 @@ void ARMAsmPrinter::emitAttributes() {
// We consider krait as a "cortex-a9" + hwdiv CPU
// Enable hwdiv through ".arch_extension idiv"
if (STI.hasDivide() || STI.hasDivideInARMMode())
- ATS.emitArchExtension(ARM::AEK_HWDIV);
+ ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
} else
ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
}
@@ -807,8 +838,6 @@ void ARMAsmPrinter::emitAttributes() {
else if (STI.hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowVirtualization);
-
- ATS.finishAttributeSection();
}
//===----------------------------------------------------------------------===//
@@ -828,8 +857,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
case ARMCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
case ARMCP::TPOFF: return MCSymbolRefExpr::VK_TPOFF;
case ARMCP::GOTTPOFF: return MCSymbolRefExpr::VK_GOTTPOFF;
- case ARMCP::GOT: return MCSymbolRefExpr::VK_GOT;
- case ARMCP::GOTOFF: return MCSymbolRefExpr::VK_GOTOFF;
+ case ARMCP::GOT_PREL: return MCSymbolRefExpr::VK_ARM_GOT_PREL;
}
llvm_unreachable("Invalid ARMCPModifier!");
}
@@ -875,8 +903,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
void ARMAsmPrinter::
EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
- const DataLayout *DL = TM.getDataLayout();
- int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+ const DataLayout &DL = getDataLayout();
+ int Size = DL.getTypeAllocSize(MCPV->getType());
ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
@@ -909,10 +937,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
OutContext);
if (ACPV->getPCAdjustment()) {
- MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- ACPV->getLabelId(),
- OutContext);
+ MCSymbol *PCLabel =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ ACPV->getLabelId(), OutContext);
const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
PCRelExpr =
MCBinaryExpr::createAdd(PCRelExpr,
@@ -1136,6 +1163,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = 0;
break;
case ARM::ADDri:
+ case ARM::t2ADDri:
Offset = -MI->getOperand(2).getImm();
break;
case ARM::SUBri:
@@ -1198,7 +1226,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
#include "ARMGenMCPseudoLowering.inc"
void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1355,9 +1383,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(2).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1388,9 +1416,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
- MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
- getFunctionNumber(),
- MI->getOperand(3).getImm(), OutContext);
+ MCSymbol *LabelSym =
+ getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(3).getImm(), OutContext);
const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
const MCExpr *PCRelExpr =
@@ -1414,10 +1442,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
@@ -1436,10 +1463,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// This adds the address of LPC0 to r0.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the add.
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
@@ -1468,10 +1494,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// a PC-relative address at the ldr instruction.
// Emit the label.
- OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+ OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
getFunctionNumber(),
- MI->getOperand(2).getImm(),
- OutContext));
+ MI->getOperand(2).getImm(), OutContext));
// Form and emit the load
unsigned Opcode;
@@ -1519,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MCPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MCPE.Val.ConstVal);
+ EmitGlobalConstant(DL, MCPE.Val.ConstVal);
return;
}
case ARM::JUMPTABLE_ADDRS:
@@ -1653,12 +1678,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// adds $val, #7
// str $val, [$src, #4]
// movs r0, #0
- // b 1f
+ // b LSJLJEH
// movs r0, #1
- // 1:
+ // LSJLJEH:
unsigned SrcReg = MI->getOperand(0).getReg();
unsigned ValReg = MI->getOperand(1).getReg();
- MCSymbol *Label = GetARMSJLJEHLabel();
+ MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
OutStreamer->AddComment("eh_setjmp begin");
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
.addReg(ValReg)
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 3d25121..ed7be2d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// labels used for ARMv4t thumb code to make register indirect calls.
SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
+ /// OptimizationGoals - Maintain a combined optimization goal for all
+ /// functions in a module: one of Tag_ABI_optimization_goals values,
+ /// -1 if uninitialized, 0 if conflicting goals
+ int OptimizationGoals;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -84,7 +89,7 @@ public:
void EmitFunctionEntryLabel() override;
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
- void EmitXXStructor(const Constant *CV) override;
+ void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -119,8 +124,6 @@ private:
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
- MCSymbol *GetARMSJLJEHLabel() const;
-
MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
public:
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9f43e73..49f3288 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -97,7 +97,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
Subtarget(STI) {
for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
}
@@ -440,7 +440,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
if (MI->isBundle()) {
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
int PIdx = I->findFirstPredOperandIdx();
@@ -518,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
static bool isCPSRDefined(const MachineInstr *MI) {
for (const auto &MO : MI->operands())
- if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+ if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
@@ -647,7 +647,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
unsigned Size = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
@@ -853,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1043,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
switch (RC->getSize()) {
case 4:
@@ -1224,6 +1219,60 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
+/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
+/// depending on whether the result is used.
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+ bool isThumb1 = Subtarget.isThumb1Only();
+ bool isThumb2 = Subtarget.isThumb2();
+ const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
+
+ MachineInstr *MI = MBBI;
+ DebugLoc dl = MI->getDebugLoc();
+ MachineBasicBlock *BB = MI->getParent();
+
+ MachineInstrBuilder LDM, STM;
+ if (isThumb1 || !MI->getOperand(1).isDead()) {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
+ : isThumb1 ? ARM::tLDMIA_UPD
+ : ARM::LDMIA_UPD))
+ .addOperand(MI->getOperand(1));
+ } else {
+ LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
+ }
+
+ if (isThumb1 || !MI->getOperand(0).isDead()) {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
+ : isThumb1 ? ARM::tSTMIA_UPD
+ : ARM::STMIA_UPD))
+ .addOperand(MI->getOperand(0));
+ } else {
+ STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
+ }
+
+ AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
+ AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+
+ // Sort the scratch registers into ascending order.
+ const TargetRegisterInfo &TRI = getRegisterInfo();
+ llvm::SmallVector<unsigned, 6> ScratchRegs;
+ for(unsigned I = 5; I < MI->getNumOperands(); ++I)
+ ScratchRegs.push_back(MI->getOperand(I).getReg());
+ std::sort(ScratchRegs.begin(), ScratchRegs.end(),
+ [&TRI](const unsigned &Reg1,
+ const unsigned &Reg2) -> bool {
+ return TRI.getEncodingValue(Reg1) <
+ TRI.getEncodingValue(Reg2);
+ });
+
+ for (const auto &Reg : ScratchRegs) {
+ LDM.addReg(Reg, RegState::Define);
+ STM.addReg(Reg, RegState::Kill);
+ }
+
+ BB->erase(MBBI);
+}
+
+
bool
ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
@@ -1237,6 +1286,11 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return true;
}
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ expandMEMCPY(MI);
+ return true;
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
@@ -1325,9 +1379,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
// instructions, so that's probably OK, but is PIC always correct when
// we get here?
if (ACPV->isGlobalValue())
- NewCPV = ARMConstantPoolConstant::
- Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
- ARMCP::CPValue, 4);
+ NewCPV = ARMConstantPoolConstant::Create(
+ cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
+ 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
else if (ACPV->isExtSymbol())
NewCPV = ARMConstantPoolSymbol::
Create(MF.getFunction()->getContext(),
@@ -1645,16 +1699,14 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
bool ARMBaseInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!NumCycles)
return false;
// If we are optimizing for size, see if the branch in the predecessor can be
// lowered to cbn?z by the constant island lowering pass, and return false if
// so. This results in a shorter instruction sequence.
- const Function *F = MBB.getParent()->getFunction();
- if (F->hasFnAttribute(Attribute::OptimizeForSize) ||
- F->hasFnAttribute(Attribute::MinSize)) {
+ if (MBB.getParent()->getFunction()->optForSize()) {
MachineBasicBlock *Pred = *MBB.pred_begin();
if (!Pred->empty()) {
MachineInstr *LastMI = &*Pred->rbegin();
@@ -1677,12 +1729,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
// Attempt to estimate the relative costs of predication versus branching.
- unsigned UnpredCost = Probability.getNumerator() * NumCycles;
- UnpredCost /= Probability.getDenominator();
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
-
- return (NumCycles + ExtraPredCycles) <= UnpredCost;
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling NumCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+ UnpredCost += ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
+
+ return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
}
bool ARMBaseInstrInfo::
@@ -1690,23 +1744,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned TCycles, unsigned TExtra,
MachineBasicBlock &FMBB,
unsigned FCycles, unsigned FExtra,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
if (!TCycles || !FCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
- unsigned TUnpredCost = Probability.getNumerator() * TCycles;
- TUnpredCost /= Probability.getDenominator();
-
- uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
- unsigned FUnpredCost = Comp * FCycles;
- FUnpredCost /= Probability.getDenominator();
-
+ // Here we scale up each component of UnpredCost to avoid precision issue when
+ // scaling TCycles/FCycles by Probability.
+ const unsigned ScalingUpFactor = 1024;
+ unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+ unsigned FUnpredCost =
+ Probability.getCompl().scale(FCycles * ScalingUpFactor);
unsigned UnpredCost = TUnpredCost + FUnpredCost;
- UnpredCost += 1; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+ UnpredCost += 1 * ScalingUpFactor; // The branch itself
+ UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
- return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+ return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
}
bool
@@ -1744,9 +1797,10 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
llvm_unreachable("Unknown unconditional branch opcode!");
}
-/// commuteInstruction - Handle commutable instructions.
-MachineInstr *
-ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
switch (MI->getOpcode()) {
case ARM::MOVCCr:
case ARM::t2MOVCCr: {
@@ -1756,7 +1810,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
return nullptr;
- MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+ MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
if (!MI)
return nullptr;
// After swapping the MOVCC operands, also invert the condition.
@@ -1765,7 +1819,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
return MI;
}
}
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
/// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1975,21 +2029,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
}
}
-static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
- MachineInstr *MI) {
- for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
- Subreg.isValid(); ++Subreg)
- if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
- MachineBasicBlock::LQR_Dead)
- return true;
- return false;
-}
bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
MachineFunction &MF, MachineInstr *MI,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+ if (!MF.getFunction()->optForMinSize())
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2058,11 +2103,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// registers live within the function we might clobber a return value
// register; the other way a register can be live here is if it's
// callee-saved.
- // TODO: Currently, computeRegisterLiveness() does not report "live" if a
- // sub reg is live. When computeRegisterLiveness() works for sub reg, it
- // can replace isAnySubRegLive().
if (isCalleeSavedRegister(CurReg, CSRegs) ||
- isAnySubRegLive(CurReg, TRI, MI)) {
+ MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
+ MachineBasicBlock::LQR_Dead) {
// VFP pops don't allow holes in the register list, so any skip is fatal
// for our transformation. GPR pops do, so we should just keep looking.
if (IsVFPPushPop)
@@ -3381,7 +3424,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
assert(Idx != -1 && "Cannot find bundled definition!");
DefIdx = Idx;
- return II;
+ return &*II;
}
static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
@@ -3389,7 +3432,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
unsigned &UseIdx, unsigned &Dist) {
Dist = 0;
- MachineBasicBlock::const_instr_iterator II = MI; ++II;
+ MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
assert(II->isInsideBundle() && "Empty bundle?");
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -3410,7 +3453,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
}
UseIdx = Idx;
- return II;
+ return &*II;
}
/// Return the number of cycles to add to (or subtract from) the static
@@ -3652,6 +3695,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// instructions).
if (Latency > 0 && Subtarget.isThumb2()) {
const MachineFunction *MF = DefMI->getParent()->getParent();
+ // FIXME: Use Function::optForSize().
if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
--Latency;
}
@@ -3931,11 +3975,11 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
// other passes may query the latency of a bundled instruction.
if (MI->isBundle()) {
unsigned Latency = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
if (I->getOpcode() != ARM::t2IT)
- Latency += getInstrLatency(ItinData, I, PredCost);
+ Latency += getInstrLatency(ItinData, &*I, PredCost);
}
return Latency;
}
@@ -4054,8 +4098,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->
- getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
AddDefaultPred(MIB);
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index b4706e3..d80c494 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -86,6 +86,18 @@ protected:
RegSubRegPair &BaseReg,
RegSubRegPairAndIdx &InsertedReg) const override;
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
public:
// Return whether the target has an explicit NOP encoding.
bool hasNOP() const;
@@ -188,9 +200,6 @@ public:
MachineInstr *duplicate(MachineInstr *Orig,
MachineFunction &MF) const override;
- MachineInstr *commuteInstruction(MachineInstr*,
- bool=false) const override;
-
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
unsigned SubIdx, unsigned State,
const TargetRegisterInfo *TRI) const;
@@ -224,15 +233,15 @@ public:
bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
unsigned ExtraT, MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const override {
+ BranchProbability Probability) const override {
return NumCycles == 1;
}
@@ -343,6 +352,8 @@ private:
virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
Reloc::Model RM) const = 0;
+ void expandMEMCPY(MachineBasicBlock::iterator) const;
+
private:
/// Modeling special VFP / NEON fp MLA / MLS hazards.
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e7d5be77..419717c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -225,7 +225,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const {
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
@@ -338,7 +339,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// 1. Dynamic stack realignment is explicitly disabled,
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
// 3. There are VLAs in the function and the base pointer is disabled.
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ if (!TargetRegisterInfo::canRealignStack(MF))
return false;
if (AFI->isThumb1OnlyFunction())
return false;
@@ -356,18 +357,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
}
bool ARMBaseRegisterInfo::
-needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
-bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index fdc1ef9..cea8b80 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -94,7 +94,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
@@ -126,15 +126,15 @@ public:
ArrayRef<MCPhysReg> Order,
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF,
- const VirtRegMap *VRM) const override;
+ const VirtRegMap *VRM,
+ const LiveRegMatrix *Matrix) const override;
void updateRegAllocHint(unsigned Reg, unsigned NewReg,
MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
index d687568..a731d00 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.h
@@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
-static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
- ARM::S4, ARM::S5, ARM::S6, ARM::S7,
- ARM::S8, ARM::S9, ARM::S10, ARM::S11,
- ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
-static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
@@ -199,9 +199,11 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
- unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
+ auto &DL = State.getMachineFunction().getDataLayout();
+ unsigned StackAlign = DL.getStackAlignment();
+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
- ArrayRef<uint16_t> RegList;
+ ArrayRef<MCPhysReg> RegList;
switch (LocVT.SimpleTy) {
case MVT::i32: {
RegList = RRegList;
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index 27cf06b..2335164 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
+ CCIfType<[v2f64], CCIfAlign<"16",
+ CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index f4ec8c6..e89757c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -342,7 +342,7 @@ void ARMConstantIslands::verify() {
#ifndef NDEBUG
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
+ MachineBasicBlock *MBB = &*MBBI;
unsigned MBBId = MBB->getNumber();
assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
}
@@ -542,7 +542,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -589,6 +589,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
+ if (MI == MBB.end())
+ continue;
unsigned JTOpcode;
switch (MI->getOpcode()) {
@@ -639,12 +641,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
/// into the block immediately after it.
bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI = MBB->getIterator();
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
return false;
@@ -722,15 +724,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
// The known bits of the entry block offset are determined by the function
// alignment.
BBInfo.front().KnownBits = MF->getAlignment();
// Compute block offsets and known bits.
- adjustBBOffsetsAfter(MF->begin());
+ adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -968,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
@@ -1088,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
- MachineFunction::const_iterator NextBlock = Water;
+ MachineFunction::const_iterator NextBlock = Water->getIterator();
if (++NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
NextBlockAlignment = 0;
@@ -1350,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = std::next(MachineFunction::iterator(UserMBB));
+ NewMBB = &*++UserMBB->getIterator();
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
@@ -1503,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
NewWaterList.insert(NewIsland);
// The new CPE goes before the following block (NewMBB).
- NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+ NewMBB = &*++WaterBB->getIterator();
} else {
// No water found.
DEBUG(dbgs() << "No water found\n");
@@ -1515,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1532,7 +1533,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF->insert(NewMBB, NewIsland);
+ MF->insert(NewMBB->getIterator(), NewIsland);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
@@ -1553,7 +1554,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1732,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
@@ -2058,9 +2059,9 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
/// \brief Returns whether CPEMI is the first instruction in the block
/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
/// we can switch the first register to PC and usually remove the address
-/// calculation that preceeded it.
+/// calculation that preceded it.
static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
- MachineFunction::iterator MBB = JTMI->getParent();
+ MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
MachineFunction *MF = MBB->getParent();
++MBB;
@@ -2235,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
- MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator BBi = BB->getIterator();
MachineFunction::iterator OldPrior = std::prev(BBi);
// If the block terminator isn't analyzable, don't try to move the block
@@ -2258,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Create a new MBB for the code after the jump BB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
- MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++JTBB->getIterator();
MF->insert(MBBI, NewBB);
// Add an unconditional branch from NewBB to BB.
@@ -2273,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Update the CFG.
NewBB->addSuccessor(BB);
- JTBB->removeSuccessor(BB);
- JTBB->addSuccessor(NewBB);
+ JTBB->replaceSuccessor(BB, NewBB);
++NumJTInserted;
return NewBB;
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index 7d41c69..c9849b2 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -52,8 +52,7 @@ const char *ARMConstantPoolValue::getModifierText() const {
// strings if that's legal.
case ARMCP::no_modifier: return "none";
case ARMCP::TLSGD: return "tlsgd";
- case ARMCP::GOT: return "GOT";
- case ARMCP::GOTOFF: return "GOTOFF";
+ case ARMCP::GOT_PREL: return "GOT_PREL";
case ARMCP::GOTTPOFF: return "gottpoff";
case ARMCP::TPOFF: return "tpoff";
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index 36f63e2..6b18a4e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -39,8 +39,7 @@ namespace ARMCP {
enum ARMCPModifier {
no_modifier,
TLSGD,
- GOT,
- GOTOFF,
+ GOT_PREL,
GOTTPOFF,
TPOFF
};
@@ -103,8 +102,6 @@ public:
bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
- unsigned getRelocationInfo() const override { return 2; }
-
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4438f50..56f3498 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -330,22 +330,19 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
/// load or store pseudo instruction.
static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
- const unsigned NumEntries = array_lengthof(NEONLdStTable);
-
#ifndef NDEBUG
// Make sure the table is sorted.
static bool TableChecked = false;
if (!TableChecked) {
- for (unsigned i = 0; i != NumEntries-1; ++i)
- assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
- "NEONLdStTable is not sorted!");
+ assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
+ "NEONLdStTable is not sorted!");
TableChecked = true;
}
#endif
- const NEONLdStTableEntry *I =
- std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
- if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+ auto I = std::lower_bound(std::begin(NEONLdStTable),
+ std::end(NEONLdStTable), Opcode);
+ if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
return I;
return nullptr;
}
@@ -734,7 +731,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16.addImm(Pred).addReg(PredReg);
if (RequiresBundling)
- finalizeBundle(MBB, &*LO16, &*MBBI);
+ finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
@@ -747,6 +744,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
switch (Opcode) {
default:
return false;
+
+ case ARM::TCRETURNdi:
+ case ARM::TCRETURNri: {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() &&
+ "Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl = MBBI->getDebugLoc();
+ const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+ MBB.getParent()->getSubtarget().getInstrInfo());
+
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ unsigned TCOpcode =
+ STI->isThumb()
+ ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+ : ARM::TAILJMPd;
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+ if (JumpTarget.isGlobal())
+ MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ else {
+ assert(JumpTarget.isSymbol());
+ MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+ JumpTarget.getTargetFlags());
+ }
+
+ // Add the default predicate in Thumb mode.
+ if (STI->isThumb())
+ MIB.addImm(ARMCC::AL).addReg(0);
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+ .addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = std::prev(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ MBBI = NewMI;
+ return true;
+ }
case ARM::VMOVScc:
case ARM::VMOVDcc: {
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index fdd0763..9bdf823c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -922,12 +922,9 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
if (Addr.BaseType == Address::FrameIndexBase) {
int FI = Addr.Base.FI;
int Offset = Addr.Offset;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI, Offset),
- Flags,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// Now add the rest of the operands.
MIB.addFrameIndex(FI);
@@ -1278,8 +1275,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1303,8 +1299,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
@@ -1341,8 +1336,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
.addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
@@ -1355,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
TII.get(Opc)).addReg(AddrReg));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
- for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
@@ -1860,8 +1854,9 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
- } else
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ } else {
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
case CallingConv::ARM_AAPCS_VFP:
if (!isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
@@ -2944,48 +2939,51 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
unsigned Align, MVT VT) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolConstant *CPV =
- ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
- unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ LLVMContext *Context = &MF->getFunction()->getContext();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
+
+ unsigned ConstAlign =
+ MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+ unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
+
+ unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+ unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
+ .addConstantPoolIndex(Idx);
+ if (Opc == ARM::LDRcp)
+ MIB.addImm(0);
+ AddDefaultPred(MIB);
- unsigned Opc;
- unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
- // Load value.
- if (isThumb2) {
- DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM::t2LDRpci), DestReg1)
- .addConstantPoolIndex(Idx));
- Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
- } else {
- // The extra immediate is for addrmode2.
- DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(ARM::LDRcp), DestReg1)
- .addConstantPoolIndex(Idx).addImm(0));
- Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
- }
+ // Fix the address by adding pc.
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
+ : ARM::PICADD;
+ DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(TempReg)
+ .addImm(ARMPCLabelIndex);
+ if (!Subtarget->isThumb())
+ AddDefaultPred(MIB);
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- if (GlobalBaseReg == 0) {
- GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
- AFI->setGlobalBaseReg(GlobalBaseReg);
+ if (UseGOT_PREL && Subtarget->isThumb()) {
+ unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(ARM::t2LDRi12), NewDestReg)
+ .addReg(DestReg)
+ .addImm(0);
+ DestReg = NewDestReg;
+ AddOptionalDefs(MIB);
}
-
- unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
- DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
- DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
- GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc), DestReg2)
- .addReg(DestReg1)
- .addReg(GlobalBaseReg);
- if (!UseGOTOFF)
- MIB.addImm(0);
- AddOptionalDefs(MIB);
-
- return DestReg2;
+ return DestReg;
}
bool ARMFastISel::fastLowerArguments() {
@@ -3038,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() {
}
- static const uint16_t GPRArgRegs[] = {
+ static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -3055,7 +3053,7 @@ bool ARMFastISel::fastLowerArguments() {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY),
ResultReg).addReg(DstReg, getKillRegState(true));
- updateValueMap(I, ResultReg);
+ updateValueMap(&*I, ResultReg);
}
return true;
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 6744000..c5990bb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
@@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
// iOS requires FP not to be clobbered for backtracing purpose.
- if (STI.isTargetIOS())
+ if (STI.isTargetIOS() || STI.isTargetWatchOS())
return true;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -288,7 +289,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -305,7 +305,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// Determine the sizes of each callee-save spill areas and record which frame
@@ -489,7 +493,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
// Adjust SP after all the callee-save spills.
- if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+ if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
+ tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -689,60 +694,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
AFI->setShouldRestoreSPFromFP(true);
}
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl = MBBI->getDebugLoc();
- const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
- if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
- return;
-
- // Tail call return: adjust the stack pointer and jump to callee.
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
-
- // Jump to label or value in register.
- if (RetOpcode == ARM::TCRETURNdi) {
- unsigned TCOpcode = STI.isThumb() ?
- (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
- ARM::TAILJMPd;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
- if (JumpTarget.isGlobal())
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- else {
- assert(JumpTarget.isSymbol());
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
-
- // Add the default predicate in Thumb mode.
- if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
- } else if (RetOpcode == ARM::TCRETURNri) {
- BuildMI(MBB, MBBI, dl,
- TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
- addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- MachineInstr *NewMI = std::prev(MBBI);
- for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
- NewMI->addOperand(MBBI->getOperand(i));
-
- // Delete the pseudo instruction TCRETURN.
- MBB.erase(MBBI);
- MBBI = NewMI;
-}
-
void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +711,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// All calls are tail calls in GHC calling conv, and functions have no
// prologue/epilogue.
- if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
- fixTCReturn(MF, MBB);
+ if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- }
+
+ // First put ourselves on the first (from top) terminator instructions.
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
if (!AFI->hasStackFrame()) {
if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +795,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
- fixTCReturn(MF, MBB);
-
if (ArgRegsSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
@@ -932,12 +885,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
return Offset;
}
-int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -950,7 +897,6 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
SmallVector<std::pair<unsigned,bool>, 4> Regs;
unsigned i = CSI.size();
@@ -1008,7 +954,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
// Put any subsequent vpush instructions before this one: they will refer to
// higher register numbers so need to be pushed first in order to preserve
// monotonicity.
- --MI;
+ if (MI != MBB.begin())
+ --MI;
}
}
@@ -1022,12 +969,20 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
- unsigned RetOpcode = MI->getOpcode();
- bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
- RetOpcode == ARM::TCRETURNri);
- bool isInterrupt =
- RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ DebugLoc DL;
+ bool isTailCall = false;
+ bool isInterrupt = false;
+ bool isTrap = false;
+ if (MBB.end() != MI) {
+ DL = MI->getDebugLoc();
+ unsigned RetOpcode = MI->getOpcode();
+ isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+ isInterrupt =
+ RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+ isTrap =
+ RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
+ RetOpcode == ARM::tTRAP;
+ }
SmallVector<unsigned, 4> Regs;
unsigned i = CSI.size();
@@ -1043,11 +998,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
- STI.hasV5TOps()) {
- Reg = ARM::PC;
- LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ !isTrap && STI.hasV5TOps()) {
+ if (MBB.succ_empty()) {
+ Reg = ARM::PC;
+ DeleteRet = true;
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+ } else
+ LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
// Fold the return instruction into the LDM.
- DeleteRet = true;
}
// If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1026,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
.addReg(ARM::SP));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
- if (DeleteRet) {
+ if (DeleteRet && MI != MBB.end()) {
MIB.copyImplicitOps(&*MI);
MI->eraseFromParent();
}
@@ -1095,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
// Put any subsequent vpop instructions after this one: they will refer to
// higher register numbers so need to be popped afterwards.
- ++MI;
+ if (MI != MBB.end())
+ ++MI;
}
}
@@ -1109,7 +1068,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
MachineFrameInfo &MFI = *MF.getFrameInfo();
@@ -1118,7 +1077,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// slot offsets can be wrong. The offset for d8 will always be correct.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned DNum = CSI[i].getReg() - ARM::D8;
- if (DNum >= 8)
+ if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
int FI = CSI[i].getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
@@ -1269,7 +1228,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
// Find the frame index assigned to d8.
@@ -1654,13 +1613,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
- bool BigStack =
- (RS &&
- (MFI->estimateStackSize(MF) +
- ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
- estimateRSStackSizeLimit(MF, this)))
- || MFI->hasVarSizedObjects()
- || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+ bool BigStack = (RS && (MFI->estimateStackSize(MF) +
+ ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
+ estimateRSStackSizeLimit(MF, this))) ||
+ MFI->hasVarSizedObjects() ||
+ (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
@@ -1698,8 +1655,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
unsigned Reg = UnspilledCS1GPRs[i];
- // Don't spill high register if the function is thumb
+ // Don't spill high register if the function is thumb. In the case of
+ // Windows on ARM, accept R11 (frame pointer)
if (!AFI->isThumbFunction() ||
+ (STI.isTargetWindows() && Reg == ARM::R11) ||
isARMLowRegister(Reg) || Reg == ARM::LR) {
SavedRegs.set(Reg);
if (!MRI.isReserved(Reg))
@@ -1784,8 +1743,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned Align = getStackAlignment();
- Amount = (Amount+Align-1)/Align*Align;
+ Amount = alignSPAdjust(Amount);
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
@@ -1885,7 +1843,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
report_fatal_error("Segmented stacks not supported on this platform.");
- assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
@@ -1913,21 +1870,48 @@ void ARMFrameLowering::adjustForSegmentedStacks(
MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
- for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
- e = PrologueMBB.livein_end();
- i != e; ++i) {
- AllocMBB->addLiveIn(*i);
- GetMBB->addLiveIn(*i);
- McrMBB->addLiveIn(*i);
- PrevStackMBB->addLiveIn(*i);
- PostStackMBB->addLiveIn(*i);
+ // Grab everything that reaches PrologueMBB to update there liveness as well.
+ SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+ SmallVector<MachineBasicBlock *, 2> WalkList;
+ WalkList.push_back(&PrologueMBB);
+
+ do {
+ MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+ for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+ if (BeforePrologueRegion.insert(PredBB).second)
+ WalkList.push_back(PredBB);
+ }
+ } while (!WalkList.empty());
+
+ // The order in that list is important.
+ // The blocks will all be inserted before PrologueMBB using that order.
+ // Therefore the block that should appear first in the CFG should appear
+ // first in the list.
+ MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+ PostStackMBB};
+
+ for (MachineBasicBlock *B : AddedBlocks)
+ BeforePrologueRegion.insert(B);
+
+ for (const auto &LI : PrologueMBB.liveins()) {
+ for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+ PredBB->addLiveIn(LI);
+ }
+
+ // Remove the newly added blocks from the list, since we know
+ // we do not have to do the following updates for them.
+ for (MachineBasicBlock *B : AddedBlocks) {
+ BeforePrologueRegion.erase(B);
+ MF.insert(PrologueMBB.getIterator(), B);
}
- MF.push_front(PostStackMBB);
- MF.push_front(AllocMBB);
- MF.push_front(GetMBB);
- MF.push_front(McrMBB);
- MF.push_front(PrevStackMBB);
+ for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+ // Make sure the LiveIns are still sorted and unique.
+ MBB->sortUniqueLiveIns();
+ // Replace the edges to PrologueMBB by edges to the sequences
+ // we are about to add.
+ MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+ }
// The required stack size that is aligned to ARM constant criterion.
AlignedStackSize = alignToARMConstant(StackSize);
@@ -1991,7 +1975,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
MachineConstantPool *MCP = MF.getConstantPool();
- unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
+ unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
index 6fdc5ef..66f4dfb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -31,8 +31,6 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -52,7 +50,6 @@ public:
unsigned &FrameReg) const override;
int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg, int SPAdj) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
@@ -60,6 +57,11 @@ public:
void adjustForSegmentedStacks(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return true;
+ }
+
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b110628..0242440 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -160,11 +160,6 @@ public:
// Thumb Addressing Modes:
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
- unsigned Scale);
- bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
- bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
SDValue &OffImm);
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -176,8 +171,6 @@ public:
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
// Thumb 2 Addressing Modes:
- bool SelectT2ShifterOperandReg(SDValue N,
- SDValue &BaseReg, SDValue &Opc);
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
@@ -278,6 +271,22 @@ private:
// Get the alignment operand for a NEON VLD or VST instruction.
SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
bool is64BitVector);
+
+ /// Returns the number of instructions required to materialize the given
+ /// constant in a register, or 3 if a literal pool load is needed.
+ unsigned ConstantMaterializationCost(unsigned Val) const;
+
+ /// Checks if N is a multiplication by a constant where we can extract out a
+ /// power of two from the constant so that it can be used in a shift, but only
+ /// if it simplifies the materialization of the constant. Returns true if it
+ /// is, and assigns to PowerOfTwo the power of two that should be extracted
+ /// out and to NewMulConst the new constant to be multiplied by.
+ bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
+ unsigned &PowerOfTwo, SDValue &NewMulConst) const;
+
+ /// Replace N with M in CurDAG, in a way that also ensures that M gets
+ /// selected when N would have been selected.
+ void replaceDAGValue(const SDValue &N, SDValue M);
};
}
@@ -334,7 +343,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
bool isThumb2 = Subtarget->isThumb();
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
if (N->getOpcode() != ISD::ADD)
continue;
@@ -388,7 +397,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
SDValue CPTmp1;
SDValue CPTmp2;
if (isThumb2) {
- if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+ if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
continue;
} else {
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
@@ -471,6 +480,61 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
}
+unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
+ if (Subtarget->isThumb()) {
+ if (Val <= 255) return 1; // MOV
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (~Val <= 255) return 2; // MOV + MVN
+ if (ARM_AM::isThumbImmShiftedVal(Val)) return 2; // MOV + LSL
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1) return 1; // MOV
+ if (ARM_AM::getSOImmVal(~Val) != -1) return 1; // MVN
+ if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+ if (ARM_AM::isSOImmTwoPartVal(Val)) return 2; // two instrs
+ }
+ if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+ return 3; // Literal pool load
+}
+
+bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
+ unsigned MaxShift,
+ unsigned &PowerOfTwo,
+ SDValue &NewMulConst) const {
+ assert(N.getOpcode() == ISD::MUL);
+ assert(MaxShift > 0);
+
+ // If the multiply is used in more than one place then changing the constant
+ // will make other uses incorrect, so don't.
+ if (!N.hasOneUse()) return false;
+ // Check if the multiply is by a constant
+ ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!MulConst) return false;
+ // If the constant is used in more than one place then modifying it will mean
+ // we need to materialize two constants instead of one, which is a bad idea.
+ if (!MulConst->hasOneUse()) return false;
+ unsigned MulConstVal = MulConst->getZExtValue();
+ if (MulConstVal == 0) return false;
+
+ // Find the largest power of 2 that MulConstVal is a multiple of
+ PowerOfTwo = MaxShift;
+ while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
+ --PowerOfTwo;
+ if (PowerOfTwo == 0) return false;
+ }
+
+ // Only optimise if the new cost is better
+ unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
+ NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
+ unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+ unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+ return NewCost < OldCost;
+}
+
+void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
+ CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
+ CurDAG->ReplaceAllUsesWith(N, M);
+}
+
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
SDValue &BaseReg,
SDValue &Opc,
@@ -478,6 +542,24 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
if (DisableShifterOp)
return false;
+ // If N is a multiply-by-constant and it's profitable to extract a shift and
+ // use it in a shifted operand do so.
+ if (N.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
+ BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
+ N.getOperand(0), NewMulConst)
+ .getNode()),
+ 0);
+ replaceDAGValue(N.getOperand(1), NewMulConst);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
+ PowerOfTwo),
+ SDLoc(N), MVT::i32);
+ return true;
+ }
+ }
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
// Don't match base register only case. That is matched to a separate
@@ -662,6 +744,18 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
}
}
+ // If Offset is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (Offset.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(Offset.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ ShOpcVal = ARM_AM::lsl;
+ }
+ }
+
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
SDLoc(N), MVT::i32);
return true;
@@ -1086,77 +1180,13 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
}
bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
- SDValue &Offset, unsigned Scale) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
- if (!CurDAG->isBaseWithConstantOffset(N))
- return false;
-
- // Thumb does not have [sp, r] address mode.
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP))
- return false;
-
- // FIXME: Why do we explicitly check for a match here and then return false?
- // Presumably to allow something else to match, but shouldn't this be
- // documented?
- int RHSC;
- if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
- return false;
-
- Base = N.getOperand(0);
- Offset = N.getOperand(1);
- return true;
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 1);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 2);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
- SDValue &Base,
- SDValue &Offset) {
- return SelectThumbAddrModeRI(N, Base, Offset, 4);
-}
-
-bool
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm) {
- if (Scale == 4) {
- SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
- return false; // We want to select tLDRspi / tSTRspi instead.
-
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return false; // We want to select tLDRpci instead.
- }
-
if (!CurDAG->isBaseWithConstantOffset(N)) {
- if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ if (N.getOpcode() == ISD::ADD) {
+ return false; // We want to select register offset instead
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1166,23 +1196,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
- RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
- if ((LHSR && LHSR->getReg() == ARM::SP) ||
- (RHSR && RHSR->getReg() == ARM::SP)) {
- ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
- ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
- unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
- unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
-
- // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
- if (LHSC != 0 || RHSC != 0) return false;
-
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
- }
-
// If the RHS is + imm5 * scale, fold into addr mode.
int RHSC;
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
@@ -1191,9 +1204,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
return true;
}
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
- return true;
+ // Offset is too large, so use register offset instead.
+ return false;
}
bool
@@ -1263,28 +1275,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
//===----------------------------------------------------------------------===//
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
- SDValue &Opc) {
- if (DisableShifterOp)
- return false;
-
- ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
-
- // Don't match base register only case. That is matched to a separate
- // lower complexity pattern with explicit register operand.
- if (ShOpcVal == ARM_AM::no_shift) return false;
-
- BaseReg = N.getOperand(0);
- unsigned ShImmVal = 0;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- ShImmVal = RHS->getZExtValue() & 31;
- Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
- return true;
- }
-
- return false;
-}
-
bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
@@ -1425,6 +1415,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
}
}
+ // If OffReg is a multiply-by-constant and it's profitable to extract a shift
+ // and use it in a shifted operand do so.
+ if (OffReg.getOpcode() == ISD::MUL) {
+ unsigned PowerOfTwo = 0;
+ SDValue NewMulConst;
+ if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+ replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+ ShAmt = PowerOfTwo;
+ }
+ }
+
ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
return true;
@@ -2503,25 +2504,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
- bool UseCP = true;
- if (Subtarget->useMovt(*MF))
- // Thumb2-aware targets have the MOVT instruction, so all immediates can
- // be done with MOV + MOVT, at worst.
- UseCP = false;
- else {
- if (Subtarget->isThumb()) {
- UseCP = (Val > 255 && // MOV
- ~Val > 255 && // MOV + MVN
- !ARM_AM::isThumbImmShiftedVal(Val) && // MOV + LSL
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- } else
- UseCP = (ARM_AM::getSOImmVal(Val) == -1 && // MOV
- ARM_AM::getSOImmVal(~Val) == -1 && // MVN
- !ARM_AM::isSOImmTwoPartVal(Val) && // two instrs.
- !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
- }
-
- if (UseCP) {
+ // If we can't materialize the constant we need to use a literal pool
+ if (ConstantMaterializationCost(Val) > 2) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -3376,7 +3360,7 @@ static void getIntOperandsFromRegisterString(StringRef RegString,
SelectionDAG *CurDAG, SDLoc DL,
std::vector<SDValue>& Ops) {
SmallVector<StringRef, 5> Fields;
- RegString.split(Fields, ":");
+ RegString.split(Fields, ':');
if (Fields.size() > 1) {
bool AllIntFields = true;
@@ -3461,9 +3445,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags) {
+static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
if (Flags.empty())
- return 0x3;
+ return 0x2 | (int)hasDSP;
return StringSwitch<int>(Flags)
.Case("g", 0x1)
@@ -3492,7 +3476,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags);
+ int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -3501,7 +3485,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
- if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+ if (!Subtarget->hasDSP() && (Mask & 0x1))
return -1;
// The register was valid so need to put the mask in the correct place
@@ -3523,7 +3507,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags);
+ Mask = getMClassFlagsMask(Flags, true);
if (Mask == -1)
return -1;
return Mask << 2;
@@ -3742,7 +3726,7 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
}
SmallVector<StringRef, 5> Fields;
- StringRef(SpecialReg).split(Fields, "_", 1, false);
+ StringRef(SpecialReg).split(Fields, '_', 1, false);
std::string Reg = Fields[0].str();
StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
@@ -3943,6 +3927,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
// be an immediate and not a memory constraint.
// Fallthrough.
case InlineAsm::Constraint_m:
+ case InlineAsm::Constraint_o:
case InlineAsm::Constraint_Q:
case InlineAsm::Constraint_Um:
case InlineAsm::Constraint_Un:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8cc06df..9cfb06b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -142,6 +142,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+
+ if (!VT.isFloatingPoint() &&
+ VT != MVT::v2i64 && VT != MVT::v1i64)
+ for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+ setOperationAction(Opcode, VT, Legal);
}
void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -166,77 +171,78 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
- // Single-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
- setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
- setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
- setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
-
- // Double-precision floating-point arithmetic.
- setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
- setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
- setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
- setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
-
- // Single-precision comparisons.
- setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
- setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
- setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
- setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
- setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
- setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
- setLibcallName(RTLIB::UO_F32, "__unordsf2vfp");
- setLibcallName(RTLIB::O_F32, "__unordsf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
-
- // Double-precision comparisons.
- setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
- setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
- setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
- setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
- setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
- setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
- setLibcallName(RTLIB::UO_F64, "__unorddf2vfp");
- setLibcallName(RTLIB::O_F64, "__unorddf2vfp");
-
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
-
- // Floating-point to integer conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
-
- // Conversions between floating types.
- setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp");
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Single-precision floating-point arithmetic.
+ { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
+
+ // Double-precision floating-point arithmetic.
+ { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
+
+ // Single-precision comparisons.
+ { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
+ { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
+ { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
+
+ // Double-precision comparisons.
+ { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
+ { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
+ { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
+ { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
+ { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
+ { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
+ { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
+ { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // i64 conversions are done via library routines even when generating VFP
+ // instructions, so use the same ones.
+ // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+ // e.g., __floatunsidf vs. __floatunssidfvfp.
+ { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
- // Integer to floating-point conversions.
- // i64 conversions are done via library routines even when generating VFP
- // instructions, so use the same ones.
- // FIXME: There appears to be some naming inconsistency in ARM libgcc:
- // e.g., __floatunsidf vs. __floatunssidfvfp.
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+ // Set the correct calling convention for ARMv7k WatchOS. It's just
+ // AAPCS_VFP for functions as simple as libcalls.
+ if (Subtarget->isTargetWatchOS()) {
+ for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+ setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
}
}
@@ -245,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
- if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
- !Subtarget->isTargetWindows()) {
+ // RTLIB
+ if (Subtarget->isAAPCS_ABI() &&
+ (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+ Subtarget->isTargetAndroid())) {
static const struct {
const RTLIB::Libcall Op;
const char * const Name;
@@ -334,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
{ RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
- // Memory operations
- // RTABI chapter 4.3.4
- { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
- { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
};
for (const auto &LC : LibraryCalls) {
@@ -348,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (LC.Cond != ISD::SETCC_INVALID)
setCmpLibcallCC(LC.Op, LC.Cond);
}
+
+ // EABI dependent RTLIB
+ if (TM.Options.EABIVersion == EABI::EABI4 ||
+ TM.Options.EABIVersion == EABI::EABI5) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char *const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } MemOpsLibraryCalls[] = {
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : MemOpsLibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
}
if (Subtarget->isTargetWindows()) {
@@ -364,6 +390,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
{ RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
{ RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I32, "__rt_sdiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I32, "__rt_udiv", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
};
for (const auto &LC : LibraryCalls) {
@@ -373,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
- if (Subtarget->getTargetTriple().isiOS() &&
- !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+ if (Subtarget->isTargetWatchOS() ||
+ (Subtarget->isTargetIOS() &&
+ !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
}
@@ -392,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
}
+ // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
+ // a __gnu_ prefix (which is the default).
+ if (Subtarget->isTargetAEABI()) {
+ setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
+ setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
+ setLibcallName(RTLIB::FPEXT_F16_F32, "__aeabi_h2f");
+ }
+
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -579,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
@@ -605,7 +643,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADDC);
if (Subtarget->isFPOnlySP()) {
- // When targetting a floating-point unit with only single-precision
+ // When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
// are present However, no double-precision operations other than moves,
// loads and stores are provided by the hardware.
@@ -689,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
}
if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
- || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+ || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
setOperationAction(ISD::MULHS, MVT::i32, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -706,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
// ARM does not have ROTL.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ for (MVT VT : MVT::vector_valuetypes()) {
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
+ }
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
@@ -717,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand);
- setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+ // @llvm.readcyclecounter requires the Performance Monitors extension.
+ // Default to the 0 expansion on unsupported platforms.
+ // FIXME: Technically there are older ARM CPUs that have
+ // implementation-specific ways of obtaining this information.
+ if (Subtarget->hasPerfMon())
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
// Only ARMv6 has BSWAP.
if (!Subtarget->hasV6Ops())
@@ -726,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
!(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
// These are expanded into libcalls if the cpu doesn't have HW divider.
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, LibCall);
+ setOperationAction(ISD::UDIV, MVT::i32, LibCall);
}
- // FIXME: Also set divmod for SREM on EABI
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
// Register based DivRem for AEABI (RTABI 4.2)
- if (Subtarget->isTargetAEABI()) {
+ if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
@@ -762,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
- setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
@@ -776,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (!Subtarget->isTargetMachO()) {
- // Non-MachO platforms may return values in these registers via the
- // personality function.
- setExceptionPointerRegister(ARM::R0);
- setExceptionSelectorRegister(ARM::R1);
- }
-
if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
@@ -849,11 +893,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- if (Subtarget->isTargetDarwin()) {
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+ if (Subtarget->useSjLjEH())
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
- }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -912,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
- if (Subtarget->getTargetTriple().isiOS()) {
+ if (Subtarget->isTargetWatchOS()) {
+ setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+ setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+ }
+ if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
// For iOS, we don't want to the normal expansion of a libcall to
// sincos. We want to issue a libcall to __sincos_stret.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -928,6 +976,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
if (!Subtarget->isFPOnlySP()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -935,8 +990,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
}
}
+
+ if (Subtarget->hasNEON()) {
+ // vmin and vmax aren't available in a scalar form, so we use
+ // a NEON instruction with an undef lane instead.
+ setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
+ setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+ }
+
// We have target-specific dag combine patterns for the following nodes:
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
@@ -959,11 +1028,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
//// temporary - rewrite interface to use type
MaxStoresPerMemset = 8;
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemsetOptSize = 4;
MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemcpyOptSize = 2;
MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmoveOptSize = 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
@@ -1054,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMOV: return "ARMISD::CMOV";
- case ARMISD::RBIT: return "ARMISD::RBIT";
-
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -1069,7 +1136,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
- case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
@@ -1082,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+ case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
@@ -1133,14 +1202,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
- case ARMISD::FMAX: return "ARMISD::FMAX";
- case ARMISD::FMIN: return "ARMISD::FMIN";
- case ARMISD::VMAXNM: return "ARMISD::VMAX";
- case ARMISD::VMINNM: return "ARMISD::VMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
case ARMISD::VBSL: return "ARMISD::VBSL";
+ case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
@@ -1449,9 +1515,10 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, PtrOff);
- return DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
}
void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -1734,9 +1801,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1748,9 +1816,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get the address of the callee into a register
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
@@ -1768,7 +1837,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMISD::WrapperPIC, dl, PtrVt,
DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(), false, false, true, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, true, 0);
} else if (Subtarget->isTargetCOFF()) {
assert(Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target");
@@ -1781,7 +1851,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee =
DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@@ -1804,9 +1875,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(), false, false,
- false, 0);
+ Callee = DAG.getLoad(
+ PtrVt, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
} else {
@@ -1821,7 +1893,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -1831,8 +1902,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!isDirect && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
- // Emit regular call when code size is the priority
- !HasMinSizeAttr)
+ // Emit regular call when code size is the priority
+ !MF.getFunction()->optForMinSize())
// "mov lr, pc; b _foo" to avoid confusing the RSP
CallOpc = ARMISD::CALL_NOLINK;
else
@@ -2014,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CallerCC = CallerF->getCallingConv();
bool CCMatch = CallerCC == CalleeCC;
+ assert(Subtarget->supportsTailCall());
+
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.
@@ -2033,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
- // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
- // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
- // support in the assembler and linker to be used. This would need to be
- // fixed to fully support tail calls in Thumb1.
- //
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
-
- // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
- // but we need to make sure there are enough registers; the only valid
- // registers are the 4 used for parameters. We don't currently do this
- // case.
- if (Subtarget->isThumb1Only())
- return false;
-
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
@@ -2400,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
if (!CI->isTailCall() || Attr.getValueAsString() == "true")
return false;
- return !Subtarget->isThumb1Only();
+ return true;
}
// Trying to write a 64 bit value so need to split into two 32 bit values first,
@@ -2467,9 +2520,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result =
+ DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
if (RelocM == Reloc::Static)
return Result;
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
@@ -2491,9 +2545,10 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
- Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Argument =
+ DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 0);
SDValue Chain = Argument.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2543,17 +2598,19 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
true);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Chain = Offset.getValue(1);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
} else {
// local exec model
assert(model == TLSModel::LocalExec);
@@ -2561,9 +2618,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
- Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, dl, Chain, Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -2577,6 +2635,8 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
"TLS not implemented for non-ELF targets");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2597,22 +2657,31 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDLoc dl(Op);
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV,
- UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+ bool UseGOT_PREL =
+ !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDLoc dl(Op);
+ unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+ GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+ UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+ /*AddCurrentAddress=*/UseGOT_PREL);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
- CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
SDValue Chain = Result.getValue(1);
- SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
- Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
- if (!UseGOTOFF)
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+ Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+ if (UseGOT_PREL)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- MachinePointerInfo::getGOT(),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
return Result;
}
@@ -2628,9 +2697,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ return DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
}
}
@@ -2654,7 +2724,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
@@ -2680,32 +2751,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
TargetFlags));
if (GV->hasDLLImportStorageClass())
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
-SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetELF() &&
- "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- SDLoc dl(Op);
- unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV =
- ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
- ARMPCLabelIndex, PCAdj);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
- return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
-}
-
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -2722,6 +2772,13 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
}
+SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
+ Op.getOperand(0));
+}
+
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const {
@@ -2732,7 +2789,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
case Intrinsic::arm_rbit: {
assert(Op.getOperand(1).getValueType() == MVT::i32 &&
"RBIT intrinsic must have i32 type!");
- return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
+ return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
}
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2752,10 +2809,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
ARMCP::CPLSDA, PCAdj);
CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue Result =
- DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ SDValue Result = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2770,6 +2827,36 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::arm_neon_vminnm:
+ case Intrinsic::arm_neon_vmaxnm: {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
+ ? ISD::FMINNUM : ISD::FMAXNUM;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vminu:
+ case Intrinsic::arm_neon_vmaxu: {
+ if (Op.getValueType().isFloatingPoint())
+ return SDValue();
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
+ ? ISD::UMIN : ISD::UMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ case Intrinsic::arm_neon_vmins:
+ case Intrinsic::arm_neon_vmaxs: {
+ // v{min,max}s is overloaded between signed integers and floats.
+ if (!Op.getValueType().isFloatingPoint()) {
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::SMIN : ISD::SMAX;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
+ unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+ ? ISD::FMINNAN : ISD::FMAXNAN;
+ return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -2870,9 +2957,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::i32, dl, Root, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3056,9 +3144,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
if (VA.isMemLoc()) {
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ ArgValue2 = DAG.getLoad(
+ MVT::f64, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
Chain, DAG, dl);
@@ -3139,9 +3228,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
- int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
- CurByValIndex, VA.getLocMemOffset(),
- Flags.getByValSize());
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
+ VA.getLocMemOffset(), Flags.getByValSize());
InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
CCInfo.nextInRegsParam();
} else {
@@ -3151,9 +3240,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
- InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0));
+ InVals.push_back(DAG.getLoad(
+ VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0));
}
lastInsIndex = index;
}
@@ -3188,13 +3278,9 @@ static bool isFloatingPointZero(SDValue Op) {
// Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
// created by LowerConstantFP().
SDValue BitcastOp = Op->getOperand(0);
- if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
- SDValue MoveOp = BitcastOp->getOperand(0);
- if (MoveOp->getOpcode() == ISD::TargetConstant &&
- cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
- return true;
- }
- }
+ if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+ isNullConstant(BitcastOp->getOperand(0)))
+ return true;
}
return false;
}
@@ -3559,113 +3645,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
- // We can use VMAXNM/VMINNM for a compare followed by a select with the
- // same operands, as follows:
- // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
- // select c, a, b
- // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
- bool swapSides = false;
- if (!getTargetMachine().Options.NoNaNsFPMath) {
- // transformability may depend on which way around we compare
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- // the non-NaN should be RHS
- swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
- break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETULT:
- case ISD::SETULE:
- // the non-NaN should be LHS
- swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
- break;
- }
- }
- swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
- if (swapSides) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(LHS, RHS);
- }
- if (LHS == TrueVal && RHS == FalseVal) {
- bool canTransform = true;
- // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
- if (!getTargetMachine().Options.UnsafeFPMath &&
- !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- const ConstantFPSDNode *Zero;
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETUGT:
- case ISD::SETGT:
- // RHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOGE:
- case ISD::SETUGE:
- case ISD::SETGE:
- // LHS must not be -0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- !Zero->isNegative();
- break;
- case ISD::SETOLT:
- case ISD::SETULT:
- case ISD::SETLT:
- // RHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
- Zero->isNegative();
- break;
- case ISD::SETOLE:
- case ISD::SETULE:
- case ISD::SETLE:
- // LHS must not be +0
- canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
- Zero->isNegative();
- break;
- }
- }
- if (canTransform) {
- // Note: If one of the elements in a pair is a number and the other
- // element is NaN, the corresponding result element is the number.
- // This is consistent with the IEEE 754-2008 standard.
- // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
- switch (CC) {
- default:
- break;
- case ISD::SETOGT:
- case ISD::SETOGE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETUGT:
- case ISD::SETUGE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
- case ISD::SETOLT:
- case ISD::SETOLE:
- if (!DAG.isKnownNeverNaN(RHS))
- break;
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- case ISD::SETULT:
- case ISD::SETULE:
- if (!DAG.isKnownNeverNaN(LHS))
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
- }
- }
- }
-
bool swpCmpOps = false;
bool swpVselOps = false;
checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -3890,16 +3869,18 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
Addr, Op.getOperand(2), JTI);
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
- Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
} else {
- Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- MachinePointerInfo::getJumpTable(),
- false, false, false, 0);
+ Addr =
+ DAG.getLoad(PTy, dl, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ false, false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
}
@@ -3936,7 +3917,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -3988,7 +3969,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
Op.getValueType());
- return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+ return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
/*isSigned*/ false, SDLoc(Op)).first;
}
@@ -4153,6 +4134,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
Results.push_back(Read.getOperand(0));
}
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+ SelectionDAG &DAG) {
+ SDValue Op = BC->getOperand(0);
+ EVT DstVT = BC->getValueType(0);
+
+ // The only vector instruction that can produce a scalar (remember,
+ // since the bitcast was about to be turned into VMOVDRR, the source
+ // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+ // Moreover, we can do this combine only if there is one use.
+ // Finally, if the destination type is not a vector, there is not
+ // much point on forcing everything on the vector bank.
+ if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !Op.hasOneUse())
+ return SDValue();
+
+ // If the index is not constant, we will introduce an additional
+ // multiply that will stick.
+ // Give up in that case.
+ ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Index)
+ return SDValue();
+ unsigned DstNumElt = DstVT.getVectorNumElements();
+
+ // Compute the new index.
+ const APInt &APIntIndex = Index->getAPIntValue();
+ APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+ NewIndex *= APIntIndex;
+ // Check if the new constant index fits into i32.
+ if (NewIndex.getBitWidth() > 32)
+ return SDValue();
+
+ // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+ // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+ SDLoc dl(Op);
+ SDValue ExtractSrc = Op.getOperand(0);
+ EVT VecVT = EVT::getVectorVT(
+ *DAG.getContext(), DstVT.getScalarType(),
+ ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+ SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+ DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -4172,6 +4203,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn i64->f64 into VMOVDRR.
if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+ // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+ // if we can combine the bitcast with its source.
+ if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+ return Val;
+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -4383,7 +4419,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
if (!ST->hasV6T2Ops())
return SDValue();
- SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+ SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
}
@@ -4544,8 +4580,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
"Unknown shift to lower!");
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isa<ConstantSDNode>(N->getOperand(1)) ||
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+ if (!isOneConstant(N->getOperand(1)))
return SDValue();
// If we are in thumb mode, we don't have RRX.
@@ -5036,18 +5071,56 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
return VT == MVT::v8i8 && M.size() == 8;
}
+// Checks whether the shuffle mask represents a vector transpose (VTRN) by
+// checking that pairs of elements in the shuffle mask represent the same index
+// in each vector, incrementing the expected index by 2 at each step.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
+// v2={e,f,g,h}
+// WhichResult gives the offset for each element in the mask based on which
+// of the two results it belongs to.
+//
+// The transpose can be represented either as:
+// result1 = shufflevector v1, v2, result1_shuffle_mask
+// result2 = shufflevector v1, v2, result2_shuffle_mask
+// where v1/v2 and the shuffle masks have the same number of elements
+// (here WhichResult (see below) indicates which result is being checked)
+//
+// or as:
+// results = shufflevector v1, v2, shuffle_mask
+// where both results are returned in one vector and the shuffle mask has twice
+// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
+// want to check the low half and high half of the shuffle mask as if it were
+// the other case
static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ // If the mask is twice as long as the input vector then we need to check the
+ // upper and lower parts of the mask with a matching value for WhichResult
+ // FIXME: A mask with only even values will be rejected in case the first
+ // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+ // M[0] is used to determine WhichResult
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
@@ -5060,28 +5133,55 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ if (M.size() == NumElts * 2)
+ WhichResult = i / NumElts;
+ else
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
+ return false;
+ }
}
+
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
return true;
}
+// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
+// that the mask elements are either all even and in steps of size 2 or all odd
+// and in steps of size 2.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with
+// respect the how results are returned.
static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (M[i] < 0) continue; // ignore UNDEF indices
- if ((unsigned) M[i] != 2 * i + WhichResult)
- return false;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; ++j) {
+ if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
+ return false;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5097,18 +5197,27 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
if (EltSz == 64)
return false;
- unsigned Half = VT.getVectorNumElements() / 2;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned j = 0; j != 2; ++j) {
- unsigned Idx = WhichResult;
- for (unsigned i = 0; i != Half; ++i) {
- int MIdx = M[i + j * Half];
- if (MIdx >= 0 && (unsigned) MIdx != Idx)
- return false;
- Idx += 2;
+ unsigned NumElts = VT.getVectorNumElements();
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ unsigned Half = NumElts / 2;
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ for (unsigned j = 0; j < NumElts; j += Half) {
+ unsigned Idx = WhichResult;
+ for (unsigned k = 0; k < Half; ++k) {
+ int MIdx = M[i + j + k];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
+ return false;
+ Idx += 2;
+ }
}
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5116,21 +5225,37 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
+// that pairs of elements of the shufflemask represent the same index in each
+// vector incrementing sequentially through the vectors.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
+// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
+// v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with respect the how results
+// are returned.
static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5147,15 +5272,23 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
- (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
- return false;
- Idx += 1;
+ if (M.size() != NumElts && M.size() != NumElts*2)
+ return false;
+
+ for (unsigned i = 0; i < M.size(); i += NumElts) {
+ WhichResult = M[i] == 0 ? 0 : 1;
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned j = 0; j < NumElts; j += 2) {
+ if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+ (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
+ return false;
+ Idx += 1;
+ }
}
+ if (M.size() == NumElts*2)
+ WhichResult = 0;
+
// VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
if (VT.is64BitVector() && EltSz == 32)
return false;
@@ -5329,16 +5462,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// just use VDUPLANE. We can only do this if the lane being extracted
// is at a constant index, as the VDUP from lane instructions only have
// constant-index forms.
+ ConstantSDNode *constIndex;
if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Value->getOperand(1))) {
+ (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
// We need to create a new undef vector to use for the VDUPLANE if the
// size of the vector from which we get the value is different than the
// size of the vector that we need to create. We will insert the element
// such that the register coalescer will remove unnecessary copies.
if (VT != Value->getOperand(0).getValueType()) {
- ConstantSDNode *constIndex;
- constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
- assert(constIndex && "The index is not a constant!");
unsigned index = constIndex->getAPIntValue().getLimitedValue() %
VT.getVectorNumElements();
N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
@@ -5437,14 +5568,35 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// shuffle in combination with VEXTs.
SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
+ struct ShuffleSourceInfo {
+ SDValue Vec;
+ unsigned MinElt;
+ unsigned MaxElt;
+
+ // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+ // be compatible with the shuffle we intend to construct. As a result
+ // ShuffleVec will be some sliding window into the original Vec.
+ SDValue ShuffleVec;
+
+ // Code should guarantee that element i in Vec starts at element "WindowBase
+ // + i * WindowScale in ShuffleVec".
+ int WindowBase;
+ int WindowScale;
+
+ bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+ ShuffleSourceInfo(SDValue Vec)
+ : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+ WindowScale(1) {}
+ };
+ // First gather all vectors used as an immediate source for this BUILD_VECTOR
+ // node.
+ SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
@@ -5453,127 +5605,166 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// A shuffle can only come from building a vector from various
// elements of other vectors.
return SDValue();
- } else if (V.getOperand(0).getValueType().getVectorElementType() !=
- VT.getVectorElementType()) {
- // This code doesn't know how to handle shuffles where the vector
- // element types do not match (this happens because type legalization
- // promotes the return type of EXTRACT_VECTOR_ELT).
- // FIXME: It might be appropriate to extend this code to handle
- // mismatched types.
+ } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+ // Furthermore, shuffles require a constant mask, whereas extractelts
+ // accept variable indices.
return SDValue();
}
- // Record this extraction against the appropriate vector if possible...
+ // Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- // If the element number isn't a constant, we can't effectively
- // analyze what's going on.
- if (!isa<ConstantSDNode>(V.getOperand(1)))
- return SDValue();
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
+ auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+ if (Source == Sources.end())
+ Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
+ // Update the minimum and maximum lane number seen.
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ Source->MinElt = std::min(Source->MinElt, EltNo);
+ Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
// Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
+ // are involved.
+ if (Sources.size() > 2)
return SDValue();
- SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = {0, 0};
+ // Find out the smallest element size among result and two sources, and use
+ // it as element size to build the shuffle_vector.
+ EVT SmallestEltTy = VT.getVectorElementType();
+ for (auto &Source : Sources) {
+ EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+ if (SrcEltTy.bitsLT(SmallestEltTy))
+ SmallestEltTy = SrcEltTy;
+ }
+ unsigned ResMultiplier =
+ VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+ NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+
+ // If the source vector is too wide or too narrow, we may nevertheless be able
+ // to construct a compatible shuffle either by concatenating it with UNDEF or
+ // extracting a suitable range of elements.
+ for (auto &Src : Sources) {
+ EVT SrcVT = Src.ShuffleVec.getValueType();
+
+ if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+ continue;
+
+ // This stage of the search produces a source with the same element type as
+ // the original, but with a total width matching the BUILD_VECTOR output.
+ EVT EltVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+ EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
- VEXTOffsets[i] = 0;
+ if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+ if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+ return SDValue();
+ // We can pad out the smaller vector for free, so if it's part of a
+ // shuffle...
+ Src.ShuffleVec =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+ DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
- // It probably isn't worth padding out a smaller vector just to
- // break it down again in a shuffle.
- return SDValue();
}
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
+ if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+ return SDValue();
- if (MaxElts[i] - MinElts[i] >= NumElts) {
+ if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
// Span too large for a VEXT to cope
return SDValue();
}
- if (MinElts[i] >= NumElts) {
+ if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- } else if (MaxElts[i] < NumElts) {
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+ Src.WindowBase = -NumSrcElts;
+ } else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
+ Src.ShuffleVec =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
} else {
// An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(0, dl));
- SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i],
- DAG.getIntPtrConstant(NumElts, dl));
- ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(VEXTOffsets[i], dl,
- MVT::i32));
- }
- }
-
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue VEXTSrc1 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+ DAG.getConstant(NumSrcElts, dl, MVT::i32));
+
+ Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
+ VEXTSrc2,
+ DAG.getConstant(Src.MinElt, dl, MVT::i32));
+ Src.WindowBase = -Src.MinElt;
+ }
+ }
+
+ // Another possible incompatibility occurs from the vector element types. We
+ // can fix this by bitcasting the source vectors to the same type we intend
+ // for the shuffle.
+ for (auto &Src : Sources) {
+ EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+ if (SrcEltTy == SmallestEltTy)
+ continue;
+ assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+ Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+ Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+ Src.WindowBase *= Src.WindowScale;
+ }
+
+ // Final sanity check before we try to actually produce a shuffle.
+ DEBUG(
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ );
+
+ // The stars all align, our next step is to produce the mask for the shuffle.
+ SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+ int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
+ if (Entry.getOpcode() == ISD::UNDEF)
continue;
- }
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
- .getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
- }
+ auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+ int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+ // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+ // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+ // segment.
+ EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+ int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+ VT.getVectorElementType().getSizeInBits());
+ int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+ // This source is expected to fill ResMultiplier lanes of the final shuffle,
+ // starting at the appropriate offset.
+ int *LaneMask = &Mask[i * ResMultiplier];
+
+ int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+ ExtractBase += NumElts * (Src - Sources.begin());
+ for (int j = 0; j < LanesDefined; ++j)
+ LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
+ if (!isShuffleMaskLegal(Mask, ShuffleVT))
+ return SDValue();
- return SDValue();
+ // We can't handle more than two sources. This should have already
+ // been checked before this point.
+ assert(Sources.size() <= 2 && "Too many sources!");
+
+ SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+ for (unsigned i = 0; i < Sources.size(); ++i)
+ ShuffleOps[i] = Sources[i].ShuffleVec;
+
+ SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+ ShuffleOps[1], &Mask[0]);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
@@ -6235,6 +6426,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
// Convert to float
// float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
// float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6265,6 +6458,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
static SDValue
LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
+
SDValue N2;
// Convert to float.
// float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6337,6 +6532,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
}
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+ // TODO: Should this propagate fast-math-flags?
EVT VT = Op.getValueType();
assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
"unexpected type for custom-lowering ISD::UDIV");
@@ -6445,45 +6641,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
auto PtrVT = getPointerTy(DAG.getDataLayout());
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Pair of floats / doubles used to pass the result.
- StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
-
- // Create stack object for sret.
+ Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
auto &DL = DAG.getDataLayout();
- const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
- const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
- SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
ArgListTy Args;
- ArgListEntry Entry;
-
- Entry.Node = SRet;
- Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
- Args.push_back(Entry);
+ bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+ SDValue SRet;
+ if (ShouldUseSRet) {
+ // Create stack object for sret.
+ const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+ const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+ SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+ ArgListEntry Entry;
+ Entry.Node = SRet;
+ Entry.Ty = RetTy->getPointerTo();
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isSRet = true;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(*DAG.getContext());
+ }
+ ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.isSExt = false;
Entry.isZExt = false;
Args.push_back(Entry);
- const char *LibcallName = (ArgVT == MVT::f64)
- ? "__sincos_stret" : "__sincosf_stret";
+ const char *LibcallName =
+ (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ RTLIB::Libcall LC =
+ (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+ CallingConv::ID CC = getLibcallCallingConv(LC);
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
- std::move(Args), 0)
- .setDiscardResult();
-
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+ .setDiscardResult(ShouldUseSRet);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ if (!ShouldUseSRet)
+ return CallResult.first;
+
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
MachinePointerInfo(), false, false, false, 0);
@@ -6498,6 +6705,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
LoadSin.getValue(0), LoadCos.getValue(0));
}
+SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+ bool Signed,
+ SDValue &Chain) const {
+ EVT VT = Op.getValueType();
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ const char *Name = nullptr;
+ if (Signed)
+ Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+ else
+ Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+
+ SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
+
+ ARMTargetLowering::ArgListTy Args;
+
+ for (auto AI : {1, 0}) {
+ ArgListEntry Arg;
+ Arg.Node = Op.getOperand(AI);
+ Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
+ Args.push_back(Arg);
+ }
+
+ CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
+ ES, std::move(Args), 0);
+
+ return LowerCallTo(CLI).first;
+}
+
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ assert(Op.getValueType() == MVT::i32 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
+ DAG.getEntryNode(), Op.getOperand(1));
+
+ return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+}
+
+void ARMTargetLowering::ExpandDIV_Windows(
+ SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const {
+ const auto &DL = DAG.getDataLayout();
+ const auto &TLI = DAG.getTargetLoweringInfo();
+
+ assert(Op.getValueType() == MVT::i64 &&
+ "unexpected type for custom lowering DIV");
+ SDLoc dl(Op);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
+
+ SDValue DBZCHK =
+ DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+
+ SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+
+ SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
+ SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
+ DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
+ Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
+
+ Results.push_back(Lower);
+ Results.push_back(Upper);
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
// Monotonic load/store is legal for all targets
if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -6513,36 +6799,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
SDLoc DL(N);
- SDValue Cycles32, OutChain;
-
- if (Subtarget->hasPerfMon()) {
- // Under Power Management extensions, the cycle-count is:
- // mrc p15, #0, <Rt>, c9, c13, #0
- SDValue Ops[] = { N->getOperand(0), // Chain
- DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
- DAG.getConstant(15, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32),
- DAG.getConstant(9, DL, MVT::i32),
- DAG.getConstant(13, DL, MVT::i32),
- DAG.getConstant(0, DL, MVT::i32)
- };
-
- Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), Ops);
- OutChain = Cycles32.getValue(1);
- } else {
- // Intrinsic is defined to return 0 on unsupported platforms. Technically
- // there are older ARM CPUs that have implementation-specific ways of
- // obtaining this information (FIXME!).
- Cycles32 = DAG.getConstant(0, DL, MVT::i32);
- OutChain = DAG.getEntryNode();
- }
-
+ // Under Power Management extensions, the cycle-count is:
+ // mrc p15, #0, <Rt>, c9, c13, #0
+ SDValue Ops[] = { N->getOperand(0), // Chain
+ DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+ DAG.getConstant(15, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(9, DL, MVT::i32),
+ DAG.getConstant(13, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32)
+ };
- SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
- Cycles32, DAG.getConstant(0, DL, MVT::i32));
- Results.push_back(Cycles64);
- Results.push_back(OutChain);
+ SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+ DAG.getConstant(0, DL, MVT::i32)));
+ Results.push_back(Cycles32.getValue(1));
}
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -6576,15 +6848,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
- case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
+ case ISD::SREM: return LowerREM(Op.getNode(), DAG);
+ case ISD::UREM: return LowerREM(Op.getNode(), DAG);
case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
@@ -6622,13 +6896,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Don't know how to custom lower this!");
case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ARMISD::WIN__DBZCHK: return SDValue();
}
}
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>&Results,
+ SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDValue Res;
switch (N->getOpcode()) {
@@ -6644,9 +6919,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = LowerREM(N, DAG);
+ break;
case ISD::READCYCLECOUNTER:
ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
return;
+ case ISD::UDIV:
+ case ISD::SDIV:
+ assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
+ return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+ Results);
}
if (Res.getNode())
Results.push_back(Res);
@@ -6683,12 +6967,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
// Grab constant pool and fixed stack memory operands.
MachineMemOperand *CPMMO =
- MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
- MachineMemOperand::MOLoad, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
MachineMemOperand *FIMMOSt =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore, 4, 4);
+ MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOStore, 4, 4);
// Load the address of the dispatch MBB into the jump buffer.
if (isThumb2) {
@@ -6792,7 +7076,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
MachineModuleInfo &MMI = MF->getMMI();
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
- if (!BB->isLandingPad()) continue;
+ if (!BB->isEHPad()) continue;
// FIXME: We should assert that the EH_LABEL is the first MI in the landing
// pad.
@@ -6807,7 +7091,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
for (SmallVectorImpl<unsigned>::iterator
CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
CSI != CSE; ++CSI) {
- CallSiteNumToLPad[*CSI].push_back(BB);
+ CallSiteNumToLPad[*CSI].push_back(&*BB);
MaxCSNum = std::max(MaxCSNum, *CSI);
}
break;
@@ -6840,7 +7124,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// Shove the dispatch's address into the return slot in the function context.
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
- DispatchBB->setIsLandingPad();
+ DispatchBB->setIsEHPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
unsigned trap_opcode;
@@ -6864,10 +7148,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// context.
SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
- MachineMemOperand *FIMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad |
- MachineMemOperand::MOVolatile, 4, 4);
+ MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI),
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
MachineInstrBuilder MIB;
MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
@@ -6982,9 +7265,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
.addReg(NewVReg2, RegState::Kill)
.addReg(NewVReg3));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
@@ -7066,9 +7348,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
.addJumpTableIndex(MJTI));
- MachineMemOperand *JTMMOLd =
- MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
- MachineMemOperand::MOLoad, 4, 4);
+ MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+ MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
AddDefaultPred(
BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
@@ -7109,13 +7390,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
BB->succ_end());
while (!Successors.empty()) {
MachineBasicBlock *SMBB = Successors.pop_back_val();
- if (SMBB->isLandingPad()) {
+ if (SMBB->isEHPad()) {
BB->removeSuccessor(SMBB);
MBBLPads.push_back(SMBB);
}
}
- BB->addSuccessor(DispatchBB);
+ BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+ BB->normalizeSuccProbs();
// Find the invoke call and mark all of the callee-saved registers as
// 'implicit defined' so that they're spilled. This prevents code from
@@ -7157,7 +7439,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
// landing pad now.
for (SmallVectorImpl<MachineBasicBlock*>::iterator
I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
- (*I)->setIsLandingPad(false);
+ (*I)->setIsEHPad(false);
// The instruction is gone now.
MI->eraseFromParent();
@@ -7280,8 +7562,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
// Otherwise, we will generate unrolled scalar copies.
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned src = MI->getOperand(1).getReg();
@@ -7574,6 +7855,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+ MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
+ MF->push_back(ContBB);
+ ContBB->splice(ContBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ MBB->addSuccessor(ContBB);
+
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
+ MBB->addSuccessor(TrapBB);
+
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
+ .addReg(MI->getOperand(0).getReg())
+ .addMBB(TrapBB);
+
+ MI->eraseFromParent();
+ return ContBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
@@ -7643,8 +7950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -7741,6 +8047,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
+ return BB;
+
+ case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
@@ -7759,8 +8068,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
// SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator BBI = BB;
- ++BBI;
+ MachineFunction::iterator BBI = ++BB->getIterator();
MachineFunction *Fn = BB->getParent();
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
@@ -7824,11 +8132,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitStructByval(MI, BB);
case ARM::WIN__CHKSTK:
return EmitLowered__chkstk(MI, BB);
+ case ARM::WIN__DBZCHK:
+ return EmitLowered__dbzchk(MI, BB);
+ }
+}
+
+/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
+/// when it is expanded into LDM/STM. This is done as a post-isel lowering
+/// instead of as a custom inserter because we need the use list from the SDNode.
+static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
+ MachineInstr *MI, const SDNode *Node) {
+ bool isThumb1 = Subtarget->isThumb1Only();
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *MF = MI->getParent()->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ MachineInstrBuilder MIB(*MF, MI);
+
+ // If the new dst/src is unused mark it as dead.
+ if (!Node->hasAnyUseOfValue(0)) {
+ MI->getOperand(0).setIsDead(true);
+ }
+ if (!Node->hasAnyUseOfValue(1)) {
+ MI->getOperand(1).setIsDead(true);
+ }
+
+ // The MEMCPY both defines and kills the scratch registers.
+ for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+ unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+ : &ARM::GPRRegClass);
+ MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
}
}
void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
SDNode *Node) const {
+ if (MI->getOpcode() == ARM::MEMCPY) {
+ attachMEMCPYScratchRegs(Subtarget, MI, Node);
+ return;
+ }
+
const MCInstrDesc *MCID = &MI->getDesc();
// Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
// RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7898,10 +8241,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
// Helper function that checks if N is a null or all ones constant.
static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- if (!C)
- return false;
- return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+ return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
}
// Return true if N is conditionally 0 or all ones.
@@ -8723,12 +9063,88 @@ static SDValue PerformXORCombine(SDNode *N,
return SDValue();
}
-/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
-/// the bits being cleared by the AND are not demanded by the BFI.
+// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
+// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
+// their position in "to" (Rd).
+static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
+ assert(N->getOpcode() == ARMISD::BFI);
+
+ SDValue From = N->getOperand(1);
+ ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+ FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+
+ // If the Base came from a SHR #C, we can deduce that it is really testing bit
+ // #C in the base of the SHR.
+ if (From->getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(From->getOperand(1))) {
+ APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+ assert(Shift.getLimitedValue() < 32 && "Shift too large!");
+ FromMask <<= Shift.getLimitedValue(31);
+ From = From->getOperand(0);
+ }
+
+ return From;
+}
+
+// If A and B contain one contiguous set of bits, does A | B == A . B?
+//
+// Neither A nor B must be zero.
+static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
+ unsigned LastActiveBitInA = A.countTrailingZeros();
+ unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+ return LastActiveBitInA - 1 == FirstActiveBitInB;
+}
+
+static SDValue FindBFIToCombineWith(SDNode *N) {
+ // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
+ // if one exists.
+ APInt ToMask, FromMask;
+ SDValue From = ParseBFI(N, ToMask, FromMask);
+ SDValue To = N->getOperand(0);
+
+ // Now check for a compatible BFI to merge with. We can pass through BFIs that
+ // aren't compatible, but not if they set the same bit in their destination as
+ // we do (or that of any BFI we're going to combine with).
+ SDValue V = To;
+ APInt CombinedToMask = ToMask;
+ while (V.getOpcode() == ARMISD::BFI) {
+ APInt NewToMask, NewFromMask;
+ SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
+ if (NewFrom != From) {
+ // This BFI has a different base. Keep going.
+ CombinedToMask |= NewToMask;
+ V = V.getOperand(0);
+ continue;
+ }
+
+ // Do the written bits conflict with any we've seen so far?
+ if ((NewToMask & CombinedToMask).getBoolValue())
+ // Conflicting bits - bail out because going further is unsafe.
+ return SDValue();
+
+ // Are the new bits contiguous when combined with the old bits?
+ if (BitsProperlyConcatenate(ToMask, NewToMask) &&
+ BitsProperlyConcatenate(FromMask, NewFromMask))
+ return V;
+ if (BitsProperlyConcatenate(NewToMask, ToMask) &&
+ BitsProperlyConcatenate(NewFromMask, FromMask))
+ return V;
+
+ // We've seen a write to some bits, so track it.
+ CombinedToMask |= NewToMask;
+ // Keep going...
+ V = V.getOperand(0);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() == ISD::AND) {
+ // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+ // the bits being cleared by the AND are not demanded by the BFI.
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
@@ -8744,6 +9160,38 @@ static SDValue PerformBFICombine(SDNode *N,
return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
+ } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
+ // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
+ // Keep track of any consecutive bits set that all come from the same base
+ // value. We can combine these together into a single BFI.
+ SDValue CombineBFI = FindBFIToCombineWith(N);
+ if (CombineBFI == SDValue())
+ return SDValue();
+
+ // We've found a BFI.
+ APInt ToMask1, FromMask1;
+ SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
+
+ APInt ToMask2, FromMask2;
+ SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
+ assert(From1 == From2);
+ (void)From2;
+
+ // First, unlink CombineBFI.
+ DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
+ // Then create a new BFI, combining the two together.
+ APInt NewFromMask = FromMask1 | FromMask2;
+ APInt NewToMask = ToMask1 | ToMask2;
+
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (NewFromMask[0] == 0)
+ From1 = DCI.DAG.getNode(
+ ISD::SRL, dl, VT, From1,
+ DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+ return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
+ DCI.DAG.getConstant(~NewToMask, dl, VT));
}
return SDValue();
}
@@ -9521,32 +9969,6 @@ static SDValue PerformSTORECombine(SDNode *N,
return SDValue();
}
-// isConstVecPow2 - Return true if each vector element is a power of 2, all
-// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
-static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
-{
- integerPart cN;
- integerPart c0 = 0;
- for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
- I != E; I++) {
- ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
- if (!C)
- return false;
-
- bool isExact;
- APFloat APF = C->getValueAPF();
- if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
- != APFloat::opOK || !isExact)
- return false;
-
- c0 = (I == 0) ? cN : c0;
- if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
- return false;
- }
- C = c0;
- return true;
-}
-
/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
/// can replace combinations of VMUL and VCVT (floating-point to integer)
/// when the VMUL has a constant operand that is a power of 2.
@@ -9556,30 +9978,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
/// vcvt.s32.f32 d16, d16
/// becomes:
/// vcvt.s32.f32 d16, d16, #3
-static SDValue PerformVCVTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Op = N->getOperand(0);
+ if (!Subtarget->hasNEON())
+ return SDValue();
- if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
- Op.getOpcode() != ISD::FMUL)
+ SDValue Op = N->getOperand(0);
+ if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
return SDValue();
- uint64_t C;
- SDValue N0 = Op->getOperand(0);
SDValue ConstVec = Op->getOperand(1);
- bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t IntBits = IntTy.getSizeInBits();
unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
- NumLanes > 4) {
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from f32 to i32. We can handle
// smaller integers by generating an extra truncate, but larger ones would
// be lossy. We also can't handle more then 4 lanes, since these intructions
@@ -9587,16 +10004,22 @@ static SDValue PerformVCVTCombine(SDNode *N,
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
Intrinsic::arm_neon_vcvtfp2fxu;
- SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
- NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
- DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- N0,
- DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ SDValue FixConv = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+ DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
+ DAG.getConstant(C, dl, MVT::i32));
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
return FixConv;
@@ -9611,38 +10034,44 @@ static SDValue PerformVCVTCombine(SDNode *N,
/// vdiv.f32 d16, d17, d16
/// becomes:
/// vcvt.f32.s32 d16, d16, #3
-static SDValue PerformVDIVCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
+ if (!Subtarget->hasNEON())
+ return SDValue();
+
SDValue Op = N->getOperand(0);
unsigned OpOpcode = Op.getNode()->getOpcode();
-
- if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+ if (!N->getValueType(0).isVector() ||
(OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
return SDValue();
- uint64_t C;
SDValue ConstVec = N->getOperand(1);
- bool isSigned = OpOpcode == ISD::SINT_TO_FP;
-
- if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
- !isConstVecPow2(ConstVec, isSigned, C))
+ if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+ uint32_t FloatBits = FloatTy.getSizeInBits();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
- if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+ uint32_t IntBits = IntTy.getSizeInBits();
+ unsigned NumLanes = Op.getValueType().getVectorNumElements();
+ if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
// These instructions only exist converting from i32 to f32. We can handle
// smaller integers by generating an extra extend, but larger ones would
- // be lossy.
+ // be lossy. We also can't handle more then 4 lanes, since these intructions
+ // only support v2i32/v4i32 types.
return SDValue();
}
+ BitVector UndefElements;
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+ int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+ if (C == -1 || C == 0 || C > 32)
+ return SDValue();
+
SDLoc dl(N);
+ bool isSigned = OpOpcode == ISD::SINT_TO_FP;
SDValue ConvInput = Op.getOperand(0);
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
- if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+ if (IntBits < FloatBits)
ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
ConvInput);
@@ -9652,7 +10081,7 @@ static SDValue PerformVDIVCombine(SDNode *N,
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
- ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
+ ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
/// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9680,7 +10109,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -9695,12 +10124,16 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
if (! getVShiftImm(Op, ElementBits, Cnt))
return false;
- if (isIntrinsic)
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
Cnt = -Cnt;
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+ return true;
+ }
+ return false;
}
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
@@ -9939,89 +10372,123 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
-static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
- // If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
- // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
- // a NaN; only do the transformation when it matches that behavior.
-
- // For now only do this when using NEON for FP operations; if using VFP, it
- // is not obvious that the benefit outweighs the cost of switching to the
- // NEON pipeline.
- if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
- N->getValueType(0) != MVT::f32)
+static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
+ APInt &KnownOne) {
+ if (Op.getOpcode() == ARMISD::BFI) {
+ // Conservatively, we can recurse down the first operand
+ // and just mask out all affected bits.
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+
+ // The operand to BFI is already a mask suitable for removing the bits it
+ // sets.
+ ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+ APInt Mask = CI->getAPIntValue();
+ KnownZero &= Mask;
+ KnownOne &= Mask;
+ return;
+ }
+ if (Op.getOpcode() == ARMISD::CMOV) {
+ APInt KZ2(KnownZero.getBitWidth(), 0);
+ APInt KO2(KnownOne.getBitWidth(), 0);
+ computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+
+ KnownZero &= KZ2;
+ KnownOne &= KO2;
+ return;
+ }
+ return DAG.computeKnownBits(Op, KnownZero, KnownOne);
+}
+
+SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
+ // If we have a CMOV, OR and AND combination such as:
+ // if (x & CN)
+ // y |= CM;
+ //
+ // And:
+ // * CN is a single bit;
+ // * All bits covered by CM are known zero in y
+ //
+ // Then we can convert this into a sequence of BFI instructions. This will
+ // always be a win if CM is a single bit, will always be no worse than the
+ // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
+ // three bits (due to the extra IT instruction).
+
+ SDValue Op0 = CMOV->getOperand(0);
+ SDValue Op1 = CMOV->getOperand(1);
+ auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
+ auto CC = CCNode->getAPIntValue().getLimitedValue();
+ SDValue CmpZ = CMOV->getOperand(4);
+
+ // The compare must be against zero.
+ if (!isNullConstant(CmpZ->getOperand(1)))
+ return SDValue();
+
+ assert(CmpZ->getOpcode() == ARMISD::CMPZ);
+ SDValue And = CmpZ->getOperand(0);
+ if (And->getOpcode() != ISD::AND)
+ return SDValue();
+ ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isPowerOf2())
return SDValue();
+ SDValue X = And->getOperand(0);
- SDValue CondLHS = N->getOperand(0);
- SDValue CondRHS = N->getOperand(1);
- SDValue LHS = N->getOperand(2);
- SDValue RHS = N->getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
- unsigned Opcode = 0;
- bool IsReversed;
- if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
- IsReversed = false; // x CC y ? x : y
- } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
- IsReversed = true ; // x CC y ? y : x
+ if (CC == ARMCC::EQ) {
+ // We're performing an "equal to zero" compare. Swap the operands so we
+ // canonicalize on a "not equal to zero" compare.
+ std::swap(Op0, Op1);
} else {
- return SDValue();
+ assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
}
+
+ if (Op1->getOpcode() != ISD::OR)
+ return SDValue();
- bool IsUnordered;
- switch (CC) {
- default: break;
- case ISD::SETOLT:
- case ISD::SETOLE:
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
- // will return -0, so vmin can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
- break;
+ ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
+ if (!OrC)
+ return SDValue();
+ SDValue Y = Op1->getOperand(0);
- case ISD::SETOGT:
- case ISD::SETOGE:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- // If LHS is NaN, an ordered comparison will be false and the result will
- // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
- // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
- IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
- if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
- break;
- // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
- // will return +0, so vmax can only be used for unsafe math or if one of
- // the operands is known to be nonzero.
- if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
- !DAG.getTarget().Options.UnsafeFPMath &&
- !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
- break;
- Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
- break;
- }
+ if (Op0 != Y)
+ return SDValue();
+
+ // Now, is it profitable to continue?
+ APInt OrCI = OrC->getAPIntValue();
+ unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
+ if (OrCI.countPopulation() > Heuristic)
+ return SDValue();
- if (!Opcode)
+ // Lastly, can we determine that the bits defined by OrCI
+ // are zero in Y?
+ APInt KnownZero, KnownOne;
+ computeKnownBits(DAG, Y, KnownZero, KnownOne);
+ if ((OrCI & KnownZero) != OrCI)
return SDValue();
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+
+ // OK, we can do the combine.
+ SDValue V = Y;
+ SDLoc dl(X);
+ EVT VT = X.getValueType();
+ unsigned BitInX = AndC->getAPIntValue().logBase2();
+
+ if (BitInX != 0) {
+ // We must shift X first.
+ X = DAG.getNode(ISD::SRL, dl, VT, X,
+ DAG.getConstant(BitInX, dl, VT));
+ }
+
+ for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
+ BitInY < NumActiveBits; ++BitInY) {
+ if (OrCI[BitInY] == 0)
+ continue;
+ APInt Mask(VT.getSizeInBits(), 0);
+ Mask.setBit(BitInY);
+ V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
+ // Confusingly, the operand is an *inverted* mask.
+ DAG.getConstant(~Mask, dl, VT));
+ }
+
+ return V;
}
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -10042,6 +10509,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
ARMCC::CondCodes CC =
(ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+ // BFI is only available on V6T2+.
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
+ SDValue R = PerformCMOVToBFICombine(N, DAG);
+ if (R)
+ return R;
+ }
+
// Simplify
// mov r1, r0
// cmp r1, x
@@ -10108,8 +10582,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
- case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget);
+ case ISD::FP_TO_UINT:
+ return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FDIV:
+ return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
@@ -10117,7 +10593,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
- case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
case ISD::LOAD: return PerformLOADCombine(N, DCI);
case ARMISD::VLD2DUP:
@@ -11043,37 +11518,61 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
- unsigned Opcode = Op->getOpcode();
- assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
- "Invalid opcode for Div/Rem lowering");
- bool isSigned = (Opcode == ISD::SDIVREM);
- EVT VT = Op->getValueType(0);
- Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-
+static RTLIB::Libcall getDivRemLibcall(
+ const SDNode *N, MVT::SimpleValueType SVT) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemLibcall");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
RTLIB::Libcall LC;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (SVT) {
default: llvm_unreachable("Unexpected request for libcall!");
case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
}
+ return LC;
+}
- SDValue InChain = DAG.getEntryNode();
-
+static TargetLowering::ArgListTy getDivRemArgList(
+ const SDNode *N, LLVMContext *Context) {
+ assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+ N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
+ "Unhandled Opcode in getDivRemArgList");
+ bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+ N->getOpcode() == ISD::SREM;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op->getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op->getOperand(i);
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = N->getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*Context);
+ Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Args.push_back(Entry);
}
+ return Args;
+}
+
+SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+ assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+ "Register-based DivRem lowering only");
+ unsigned Opcode = Op->getOpcode();
+ assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+ "Invalid opcode for Div/Rem lowering");
+ bool isSigned = (Opcode == ISD::SDIVREM);
+ EVT VT = Op->getValueType(0);
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+ RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
+ VT.getSimpleVT().SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
+ DAG.getContext());
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
getPointerTy(DAG.getDataLayout()));
@@ -11090,6 +11589,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
return CallInfo.first;
}
+// Lowers REM using divmod helpers
+// see RTABI section 4.2/4.3
+SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+ // Build return types (div and rem)
+ std::vector<Type*> RetTyParams;
+ Type *RetTyElement;
+
+ switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
+ case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
+ case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
+ case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
+ }
+
+ RetTyParams.push_back(RetTyElement);
+ RetTyParams.push_back(RetTyElement);
+ ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
+ Type *RetTy = StructType::get(*DAG.getContext(), ret);
+
+ RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
+ SimpleTy);
+ SDValue InChain = DAG.getEntryNode();
+ TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+ bool isSigned = N->getOpcode() == ISD::SREM;
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy(DAG.getDataLayout()));
+
+ // Lower call
+ CallLoweringInfo CLI(DAG);
+ CLI.setChain(InChain)
+ .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+ // Return second (rem) result operand (first contains div)
+ SDNode *ResNode = CallResult.first.getNode();
+ assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
+ return ResNode->getOperand(1);
+}
+
SDValue
ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "unsupported target platform");
@@ -11124,8 +11664,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -11137,8 +11677,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
+ return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+ SDLoc(Op)).first;
}
bool
@@ -11186,7 +11726,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
- uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+ uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
@@ -11212,7 +11752,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
- NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+ NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
@@ -11295,8 +11835,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return true;
}
-bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
-
Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
ARM_MB::MemBOpt Domain) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11392,19 +11930,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
// guarantee, see DDI0406C ARM architecture reference manual,
// sections A8.8.72-74 LDRD)
-bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return (Size == 64) && !Subtarget->isMClass();
+ return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
// For the real atomic operations, we have ldrex/strex up to 32 bits,
// and up to 64 bits on the non-M profiles
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
return (Size <= (Subtarget->isMClass() ? 32U : 64U))
- ? AtomicRMWExpansionKind::LLSC
- : AtomicRMWExpansionKind::None;
+ ? AtomicExpansionKind::LLSC
+ : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *AI) const {
+ return true;
}
// This has so far only been implemented for MachO.
@@ -11419,7 +11964,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
// Floating point values and vector values map to the same register file.
- // Therefore, althought we could do a store extract of a vector type, this is
+ // Therefore, although we could do a store extract of a vector type, this is
// better to leave at float as we have more freedom in the addressing mode for
// those.
if (VectorTy->isFPOrFPVectorTy())
@@ -11441,6 +11986,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
return false;
}
+bool ARMTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget->hasV6T2Ops();
+}
+
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11477,6 +12030,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
cast<PointerType>(Addr->getType())->getElementType());
}
+void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+ IRBuilder<> &Builder) const {
+ if (!Subtarget->hasV7Ops())
+ return;
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+}
+
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr,
AtomicOrdering Ord) const {
@@ -11534,12 +12095,12 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
- // support i64/f64 element).
- if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vldN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
return false;
// A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -11552,9 +12113,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
-
IRBuilder<> Builder(LI);
SmallVector<Value *, 2> Ops;
@@ -11562,6 +12120,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ Type *Tys[] = { VecTy, Int8Ptr };
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
// Replace uses of each shufflevector with the corresponding vector loaded
@@ -11624,12 +12185,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip illegal sub vector types and vector types of i64/f64 element (vstN
- // doesn't support i64/f64 element).
- if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types and vector types
+ // with i64/f64 elements (vstN doesn't support i64/f64 elements).
+ if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
+ EltIs64Bits)
return false;
Value *Op0 = SVI->getOperand(0);
@@ -11650,17 +12212,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, NumSubElts);
}
- static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
- Intrinsic::arm_neon_vst3,
- Intrinsic::arm_neon_vst4};
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], SubVecTy);
-
+ static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+ Intrinsic::arm_neon_vst3,
+ Intrinsic::arm_neon_vst4};
SmallVector<Value *, 6> Ops;
Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ Type *Tys[] = { Int8Ptr, SubVecTy };
+ Function *VstNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), StoreInts[Factor - 2], Tys);
+
// Split the shufflevector operands into sub vectors for the new vstN call.
for (unsigned i = 0; i < Factor; i++)
Ops.push_back(Builder.CreateShuffleVector(
@@ -11681,14 +12244,14 @@ enum HABaseType {
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
uint64_t &Members) {
- if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ if (auto *ST = dyn_cast<StructType>(Ty)) {
for (unsigned i = 0; i < ST->getNumElements(); ++i) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
return false;
Members += SubMembers;
}
- } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
uint64_t SubMembers = 0;
if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
return false;
@@ -11703,7 +12266,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
return false;
Members = 1;
Base = HA_DOUBLE;
- } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
Members = 1;
switch (Base) {
case HA_FLOAT:
@@ -11747,3 +12310,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
return IsHA || IsIntArray;
}
+
+unsigned ARMTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+}
+
+unsigned ARMTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Platforms which do not use SjLj EH may return values in these registers
+ // via the personality function.
+ return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index efc9020..b764624 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -63,8 +63,6 @@ namespace llvm {
BCC_i64,
- RBIT, // ARM bitreverse instruction
-
SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
@@ -79,6 +77,7 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
TC_RETURN, // Tail call return pseudo.
@@ -91,6 +90,7 @@ namespace llvm {
PRELOAD, // Preload
WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+ WIN__DBZCHK, // Windows' divide by zero check
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
@@ -172,12 +172,6 @@ namespace llvm {
// BUILD_VECTOR for this purpose.
BUILD_VECTOR,
- // Floating-point max and min:
- FMAX,
- FMIN,
- VMAXNM,
- VMINNM,
-
// Bit-field insert
BFI,
@@ -189,6 +183,10 @@ namespace llvm {
// Vector bitwise select
VBSL,
+ // Pseudo-instruction representing a memory copy using ldm/stm
+ // instructions.
+ MEMCPY,
+
// Vector load N-element structure to all lanes:
VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
VLD3DUP,
@@ -260,6 +258,7 @@ namespace llvm {
SDNode *Node) const override;
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+ SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
@@ -348,6 +347,8 @@ namespace llvm {
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
+ else if (ConstraintCode == "o")
+ return InlineAsm::Constraint_o;
else if (ConstraintCode.size() == 2) {
if (ConstraintCode[0] == 'U') {
switch(ConstraintCode[1]) {
@@ -420,13 +421,24 @@ namespace llvm {
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
- bool hasLoadLinkedStoreConditional() const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
+ void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
bool IsStore, bool IsLoad) const override;
Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
@@ -441,16 +453,21 @@ namespace llvm {
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+ bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -496,6 +513,7 @@ namespace llvm {
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -508,7 +526,6 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
- SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -526,6 +543,12 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+ void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SmallVectorImpl<SDValue> &Results) const;
+ SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
+ SDValue &Chain) const;
+ SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -635,6 +658,8 @@ namespace llvm {
MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+ MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 84f95be..cf973d6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -51,7 +51,8 @@ void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
switch (Opc) {
- default: break;
+ default:
+ break;
case ARM::LDR_PRE_IMM:
case ARM::LDR_PRE_REG:
case ARM::LDR_POST_IMM:
@@ -124,82 +125,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
.addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(), Flag, 4, 4);
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
MIB.addMemOperand(MMO);
MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
MIB.addReg(Reg, RegState::Kill).addImm(0);
MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
AddDefaultPred(MIB);
}
-
-namespace {
- /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
- /// global base register for ARM ELF.
- struct ARMCGBR : public MachineFunctionPass {
- static char ID;
- ARMCGBR() : MachineFunctionPass(ID) {}
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->getGlobalBaseReg() == 0)
- return false;
- const ARMSubtarget &STI =
- static_cast<const ARMSubtarget &>(MF.getSubtarget());
- // Don't do this for Thumb1.
- if (STI.isThumb1Only())
- return false;
-
- const TargetMachine &TM = MF.getTarget();
- if (TM.getRelocationModel() != Reloc::PIC_)
- return false;
-
- LLVMContext *Context = &MF.getFunction()->getContext();
- unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
- unsigned PCAdj = STI.isThumb() ? 4 : 8;
- ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
- *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
-
- unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
- Type::getInt32PtrTy(*Context));
- unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
-
- MachineBasicBlock &FirstMBB = MF.front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
- unsigned TempReg =
- MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
- unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
- const TargetInstrInfo &TII = *STI.getInstrInfo();
- MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
- TII.get(Opc), TempReg)
- .addConstantPoolIndex(Idx);
- if (Opc == ARM::LDRcp)
- MIB.addImm(0);
- AddDefaultPred(MIB);
-
- // Fix the GOT address by adding pc.
- unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
- Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
- MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
- .addReg(TempReg)
- .addImm(ARMPCLabelIndex);
- if (Opc == ARM::PICADD)
- AddDefaultPred(MIB);
-
- return true;
- }
-
- const char *getPassName() const override {
- return "ARM PIC Global Base Reg Initialization";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-char ARMCGBR::ID = 0;
-FunctionPass*
-llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index 9f5bde3..b9de83b 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -59,6 +59,7 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -70,8 +71,11 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
-def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
@@ -163,21 +167,23 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp,
[SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
+ SDT_ARMEH_SJLJ_SetupDispatch,
+ [SDNPHasChain, SDNPSideEffect]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain, SDNPSideEffect]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
-def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
-def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
-def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+ SDNPMayStore, SDNPMayLoad]>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
@@ -209,6 +215,8 @@ def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
+ AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -228,7 +236,9 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
- AssemblerPredicate<"FeatureFP16","half-float">;
+ AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
+ AssemblerPredicate<"FeatureFullFP16","full half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -236,9 +246,8 @@ def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate<"FeatureT2XtPk",
"pack/extract">;
-def HasThumb2DSP : Predicate<"Subtarget->hasThumb2DSP()">,
- AssemblerPredicate<"FeatureDSPThumb2",
- "thumb2-dsp">;
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
+ AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate<"FeatureDB",
"data-barriers">;
@@ -2322,6 +2331,7 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
}
+def : MnemonicAlias<"smi", "smc">;
// Supervisor Call (Software Interrupt)
let isCall = 1, Uses = [SP] in {
@@ -3671,10 +3681,10 @@ def USAT16 : AI<(outs GPRnopc:$Rd),
let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
- (SSAT imm:$pos, GPRnopc:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
- (USAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
+ (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
+ (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -4186,7 +4196,7 @@ def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rbit", "\t$Rd, $Rm",
- [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ [(set GPR:$Rd, (bitreverse GPR:$Rm))]>,
Requires<[IsARM, HasV6T2]>,
Sched<[WriteALU]>;
@@ -4578,6 +4588,19 @@ let usesCustomInserter = 1 in {
[(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
}
+let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in {
+ // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs...
+ // Copies N registers worth of memory from address %src to address %dst
+ // and returns the incremented addresses. N scratch register will
+ // be attached for the copy to use.
+ def MEMCPY : PseudoInst<
+ (outs GPR:$newdst, GPR:$newsrc),
+ (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops),
+ NoItinerary,
+ [(set GPR:$newdst, GPR:$newsrc,
+ (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
+}
+
def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
}]>;
@@ -4705,7 +4728,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
[(int_arm_clrex)]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6K]> {
let Inst{31-0} = 0b11110101011111111111000000011111;
}
@@ -5242,6 +5265,12 @@ def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
+ [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+let usesCustomInserter = 1, Defs = [CPSR] in
+ def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
+ [(win__dbzchk GPR:$divisor)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -5301,6 +5330,10 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
Requires<[IsARM]>;
}
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in
+def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary,
+ [(ARMeh_sjlj_setup_dispatch)]>;
+
// eh.sjlj.dispatchsetup pseudo-instruction.
// This pseudo is used for both ARM and Thumb. Any differences are handled when
// the pseudo is expanded (which happens before any passes that need the
@@ -5622,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
- (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
- (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
- (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+ (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
- (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+ (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index f035d61..7020ffb 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -587,11 +587,6 @@ def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
-def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>;
-def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
-def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
-
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
unsigned EltBits = 0;
@@ -2465,17 +2460,17 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as above, but not predicated.
-class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
itin, OpcodeStr, Dt,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
-class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
- : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+ : N2Vnp<op19_18, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
@@ -3255,6 +3250,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
// 128-bit vector types.
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
@@ -3275,6 +3277,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
[(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
let Inst{10} = 1; // overwrite F = 1
}
+ def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+ opc, "f16", asm, "",
+ [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+ Requires<[HasNEON,HasFullFP16]> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
}
@@ -4110,6 +4119,12 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
v2f32, v2f32, fadd, 1>;
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
+def VADDhd : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+ v4f16, v4f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VADDhq : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+ v8f16, v8f16, fadd, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
// VADDL : Vector Add Long (Q = D + D)
defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
"vaddl", "s", add, sext, 1>;
@@ -4165,10 +4180,21 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
+def VMULhd : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+ v4f16, v4f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULhq : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+ v8f16, v8f16, fmul, 1>,
+ Requires<[HasNEON,HasFullFP16]>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
v2f32, fmul>;
+def VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+ v4f16, fmul>,
+ Requires<[HasNEON,HasFullFP16]>;
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
@@ -4277,6 +4303,12 @@ def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+ v8f16, fmul_su, fadd_mlx>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4285,6 +4317,12 @@ def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul_su, fadd_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+ v8f16, v4f16, fmul, fadd>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4495,6 +4533,12 @@ def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4503,6 +4547,12 @@ def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul_su, fsub_mlx>,
Requires<[HasNEON, UseFPVMLx]>;
+def VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+ v8f16, v4f16, fmul, fsub>,
+ Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
(mul (v8i16 QPR:$src2),
@@ -4570,6 +4620,13 @@ def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
v4f32, fmul_su, fadd_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMAhd : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+ v4f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def VFMAhq : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+ v8f16, fmul, fadd>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Fused Vector Multiply Subtract (floating-point)
def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
@@ -4578,6 +4635,12 @@ def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
v4f32, fmul_su, fsub_mlx>,
Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def VFMShd : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+ v4f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def VFMShq : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+ v8f16, fmul, fsub>,
+ Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4602,6 +4665,12 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
v2f32, v2f32, fsub, 0>;
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
+def VSUBhd : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+ v4f16, v4f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
+def VSUBhq : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+ v8f16, v8f16, fsub, 0>,
+ Requires<[HasNEON,HasFullFP16]>;
// VSUBL : Vector Subtract Long (Q = D - D)
defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
"vsubl", "s", sub, sext, 0>;
@@ -4646,6 +4715,12 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+ NEONvceq, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
@@ -4660,6 +4735,12 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+ NEONvcge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
@@ -4677,6 +4758,12 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+ NEONvcgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
@@ -4686,36 +4773,68 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+def VACGEfd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
"f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+def VACGEfq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
"f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def VACGEhd : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGEhq : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+def VACGTfd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
"f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+def VACGTfq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
"f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def VACGThd : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VACGThq : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
- (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+ (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
- (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+ (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+ (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+ (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
// Vector Bitwise Operations.
@@ -5007,6 +5126,12 @@ def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def VABDhd : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+ "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+ "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
@@ -5014,6 +5139,29 @@ defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+def abd_shr :
+ PatFrag<(ops node:$in1, node:$in2, node:$shift),
+ (NEONvshrs (sub (zext node:$in1),
+ (zext node:$in2)), (i32 $shift))>;
+
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
+ (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+ (zext (v8i8 DPR:$opB))),
+ (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
+ (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
+ (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+ (zext (v4i16 DPR:$opB))),
+ (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
+ (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
+ (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+ (zext (v2i32 DPR:$opB))),
+ (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
+ (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
@@ -5031,53 +5179,85 @@ defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
// VMAX : Vector Maximum
defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "s", int_arm_neon_vmaxs, 1>;
+ "vmax", "s", smax, 1>;
defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmax", "u", int_arm_neon_vmaxu, 1>;
+ "vmax", "u", umax, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmax", "f32",
- v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+ v2f32, v2f32, fmaxnan, 1>;
def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmax", "f32",
- v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+ v4f32, v4f32, fmaxnan, 1>;
+def VMAXhd : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmax", "f16",
+ v4f16, v4f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMAXhq : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmax", "f16",
+ v8f16, v8f16, fmaxnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMAXNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+ def VMAXNMNDf : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+ v2f32, v2f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+ def VMAXNMNQf : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vmaxnm", "f32",
- v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+ v4f32, v4f32, fmaxnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMAXNMNDh : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v4f16, v4f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMAXNMNQh : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vmaxnm", "f16",
+ v8f16, v8f16, fmaxnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// VMIN : Vector Minimum
defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "s", int_arm_neon_vmins, 1>;
+ "vmin", "s", smin, 1>;
defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vmin", "u", int_arm_neon_vminu, 1>;
+ "vmin", "u", umin, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
"vmin", "f32",
- v2f32, v2f32, int_arm_neon_vmins, 1>;
+ v2f32, v2f32, fminnan, 1>;
def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
"vmin", "f32",
- v4f32, v4f32, int_arm_neon_vmins, 1>;
+ v4f32, v4f32, fminnan, 1>;
+def VMINhd : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+ "vmin", "f16",
+ v4f16, v4f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VMINhq : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+ "vmin", "f16",
+ v8f16, v8f16, fminnan, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VMINNM
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+ def VMINNMNDf : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v2f32, v2f32, int_arm_neon_vminnm, 1>,
+ v2f32, v2f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
- def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+ def VMINNMNQf : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
N3RegFrm, NoItinerary, "vminnm", "f32",
- v4f32, v4f32, int_arm_neon_vminnm, 1>,
+ v4f32, v4f32, fminnum, 1>,
Requires<[HasV8, HasNEON]>;
+ def VMINNMNDh : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v4f16, v4f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def VMINNMNQh : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+ N3RegFrm, NoItinerary, "vminnm", "f16",
+ v8f16, v8f16, fminnum, 1>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
// Vector Pairwise Operations.
@@ -5095,6 +5275,10 @@ def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDh : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+ IIC_VPBIND, "vpadd", "f16",
+ v4f16, v4f16, int_arm_neon_vpadd, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -5123,6 +5307,9 @@ def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXh : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+ "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// VPMIN : Vector Pairwise Minimum
def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
@@ -5139,6 +5326,9 @@ def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINh : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+ "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -5155,6 +5345,14 @@ def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
IIC_VUNAQ, "vrecpe", "f32",
v4f32, v4f32, int_arm_neon_vrecpe>;
+def VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAD, "vrecpe", "f16",
+ v4f16, v4f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+ IIC_VUNAQ, "vrecpe", "f16",
+ v8f16, v8f16, int_arm_neon_vrecpe>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRECPS : Vector Reciprocal Step
def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
@@ -5163,6 +5361,14 @@ def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrecps", "f16",
+ v4f16, v4f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrecps", "f16",
+ v8f16, v8f16, int_arm_neon_vrecps, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTE : Vector Reciprocal Square Root Estimate
def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
@@ -5177,6 +5383,14 @@ def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
IIC_VUNAQ, "vrsqrte", "f32",
v4f32, v4f32, int_arm_neon_vrsqrte>;
+def VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAD, "vrsqrte", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+ IIC_VUNAQ, "vrsqrte", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrte>,
+ Requires<[HasNEON, HasFullFP16]>;
// VRSQRTS : Vector Reciprocal Square Root Step
def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
@@ -5185,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSD, "vrsqrts", "f16",
+ v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+ IIC_VRECSQ, "vrsqrts", "f16",
+ v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+ Requires<[HasNEON, HasFullFP16]>;
// Vector Shifts.
@@ -5336,6 +5558,14 @@ def VABSfd : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def VABShd : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v4f16, v4f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VABShq : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+ "vabs", "f16",
+ v8f16, v8f16, fabs>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
(v2i32 (bitconvert (v8i8 (add DPR:$src,
@@ -5398,6 +5628,16 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
(outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
"vneg", "f32", "$Vd, $Vm", "",
[(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def VNEGhd : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+ (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VNEGhq : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+ (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+ "vneg", "f16", "$Vd, $Vm", "",
+ [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+ Requires<[HasNEON, HasFullFP16]>;
def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -5868,18 +6108,56 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
v4f32, v4i32, uint_to_fp>;
+def VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v4i16, v4f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v4i16, v4f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v4f16, v4i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v4f16, v4i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
+def VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+ v8i16, v8f16, fp_to_sint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+ v8i16, v8f16, fp_to_uint>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+ v8f16, v8i16, sint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+def VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+ v8f16, v8i16, uint_to_fp>,
+ Requires<[HasNEON, HasFullFP16]>;
+
// VCVT{A, N, P, M}
multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
SDPatternOperator IntU> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
- def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
"s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
- def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
- def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
"u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+ def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v4i16, v4f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+ "s16.f16", v8i16, v8f16, IntS>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v4i16, v4f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
+ def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+ "u16.f16", v8i16, v8f16, IntU>,
+ Requires<[HasV8, HasNEON, HasFullFP16]>;
}
}
@@ -5898,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
let DecoderMethod = "DecodeVCVTQ" in {
@@ -5909,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+ v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+ v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+ v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
}
def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
@@ -5929,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+ (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+ (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+ (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+ (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+ (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+ (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+ (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+ (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
// VCVT : Vector Convert Between Half-Precision and Single-Precision.
def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -6182,22 +6498,40 @@ def VTBX4Pseudo
// VRINT : Vector Rounding
multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
- def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+ def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
- def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+ def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
!strconcat("vrint", op), "f32",
v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
let Inst{9-7} = op9_7;
}
+ def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v4f16, v4f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
+ def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+ !strconcat("vrint", op), "f16",
+ v8f16, v8f16, Int>,
+ Requires<[HasV8, HasNEON, HasFullFP16]> {
+ let Inst{9-7} = op9_7;
+ }
}
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
- (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+ (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
- (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+ (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+ (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+ def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+ (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+ }
}
defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
@@ -6343,8 +6677,8 @@ def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
def : N2VSPat<fabs, VABSfd>;
def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPat<NEONfmax, VMAXfd>;
-def : N3VSPat<NEONfmin, VMINfd>;
+def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
@@ -7704,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
(VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
(VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+ (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
(VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7719,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
(VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
(VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+ (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
// D-register versions.
@@ -7736,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
(VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
(VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+ (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
// Q-register versions.
def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
(VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7751,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
(VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
(VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+ (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
// VSWP allows, but does not require, a type suffix.
defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 40414da..df6f243 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br,
// Load Store Instructions.
//
+// PC-relative loads need to be matched first as constant pool accesses need to
+// always be PC-relative. We do this using AddedComplexity, as the pattern is
+// simpler than the patterns of the other load instructions.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+ T1Encoding<{0,1,0,0,1,?}> {
+ // A6.2 & A8.6.59
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
+// SP-relative loads should be matched before standard immediate-offset loads as
+// it means we avoid having to move SP to another register.
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+ "ldr", "\t$Rt, $addr",
+ [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+ T1LdStSP<{1,?,?}> {
+ bits<3> Rt;
+ bits<8> addr;
+ let Inst{10-8} = Rt;
+ let Inst{7-0} = addr;
+}
+
// Loads: reg/reg and reg/imm5
let canFoldAsLoad = 1, isReMaterializable = 1 in
multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs tGPR:$Rt), (ins AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+ // Immediate-offset loads should be matched before register-offset loads as
+ // when the offset is a constant it's simpler to first check if it fits in the
+ // immediate offset field then fall back to register-offset if it doesn't.
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 1 /* Load */,
(outs tGPR:$Rt), (ins AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+ // Register-offset loads are matched last.
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
}
// Stores: reg/reg and reg/imm5
multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
AddrMode am, InstrItinClass itin_r,
InstrItinClass itin_i, string asm,
PatFrag opnode> {
- def r : // reg/reg
- T1pILdStEncode<reg_opc,
- (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
- am, itin_r, asm, "\t$Rt, $addr",
- [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
def i : // reg/imm5
T1pILdStEncodeImm<imm_opc, 0 /* Store */,
(outs), (ins tGPR:$Rt, AddrMode_i:$addr),
am, itin_i, asm, "\t$Rt, $addr",
[(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+ def r : // reg/reg
+ T1pILdStEncode<reg_opc,
+ (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+ am, itin_r, asm, "\t$Rt, $addr",
+ [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
}
// A8.6.57 & A8.6.60
-defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iLoad_r, IIC_iLoad_i, "ldr",
UnOpFrag<(load node:$Src)>>;
// A8.6.64 & A8.6.61
-defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
UnOpFrag<(zextloadi8 node:$Src)>>;
// A8.6.76 & A8.6.73
-defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
UnOpFrag<(zextloadi16 node:$Src)>>;
@@ -659,58 +691,36 @@ def tLDRSH : // A8.6.84
"ldrsh", "\t$Rt, $addr",
[(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
-let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
- T1LdStSP<{1,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
- "ldr", "\t$Rt, $addr",
- [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
- T1Encoding<{0,1,0,0,1,?}> {
- // A6.2 & A8.6.59
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+ "str", "\t$Rt, $addr",
+ [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+ T1LdStSP<{0,?,?}> {
bits<3> Rt;
bits<8> addr;
let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
+ let Inst{7-0} = addr;
}
// A8.6.194 & A8.6.192
-defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
t_addrmode_is4, AddrModeT1_4,
IIC_iStore_r, IIC_iStore_i, "str",
BinOpFrag<(store node:$LHS, node:$RHS)>>;
// A8.6.197 & A8.6.195
-defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
t_addrmode_is1, AddrModeT1_1,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
// A8.6.207 & A8.6.205
-defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
t_addrmode_is2, AddrModeT1_2,
IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
-def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
- "str", "\t$Rt, $addr",
- [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
- T1LdStSP<{0,?,?}> {
- bits<3> Rt;
- bits<8> addr;
- let Inst{10-8} = Rt;
- let Inst{7-0} = addr;
-}
-
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -730,6 +740,7 @@ def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
// Writeback version is just a pseudo, as there's no encoding difference.
// Writeback happens iff the base register is not in the destination register
// list.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def tLDMIA_UPD :
InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
"$Rn = $wb", IIC_iLoad_mu>,
@@ -1328,16 +1339,16 @@ def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
// Bswap 16 with load/store
-def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
- (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
-def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
- t_addrmode_rrs2:$addr),
- (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rr:$addr))>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rr:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>;
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
@@ -1372,10 +1383,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
Requires<[IsThumb, HasV5T]>;
// zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
- (tLDRBr t_addrmode_rrs1:$addr)>;
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rr:$addr),
+ (tLDRBr t_addrmode_rr:$addr)>;
// extload from the stack -> word load from the stack, as it avoids having to
// materialize the base in a separate register. This only works when a word
@@ -1389,61 +1400,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
Requires<[IsThumb, IsThumb1Only, IsLE]>;
// extload -> zextload
-def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi1 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// If it's impossible to use [r,r] address mode for sextload, select to
// ldr{b|h} + sxt{b|h} instead.
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tSXTB (tLDRBi t_addrmode_is1:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tSXTB (tLDRBr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tSXTH (tLDRHi t_addrmode_is2:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tSXTH (tLDRHr t_addrmode_rr:$addr))>,
Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
- (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
(tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
- (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>;
def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
(tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+ (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>;
def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
(tLDRBi t_addrmode_is1:$src)>;
-def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
- (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rr:$src),
+ (tLDRBr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
(tLDRHi t_addrmode_is2:$src)>;
-def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
- (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rr:$src),
+ (tLDRHr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
(tLDRi t_addrmode_is4:$src)>;
-def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
- (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
+ (tLDRr t_addrmode_rr:$src)>;
def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
(tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
-def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
- (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
(tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
- (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
(tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
- (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+ (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
// Large immediate handling.
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index aba8a7b..d460d33 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -43,7 +43,7 @@ def t2_shift_imm : Operand<i32> {
// Shifted operands. No register controlled shifts for Thumb2.
// Note: We do not support rrx shifted operands yet.
def t2_so_reg : Operand<i32>, // reg imm
- ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+ ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
[shl,srl,sra,rotr]> {
let EncoderMethod = "getT2SORegOpValue";
let PrintMethod = "printT2SOOperand";
@@ -1554,19 +1554,21 @@ def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
-// For disassembly only.
+let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
+let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
"$addr.base = $wb", []>;
+let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
@@ -1574,6 +1576,7 @@ def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
+let mayStore = 1 in
def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
@@ -2100,7 +2103,7 @@ def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
let Inst{23} = 0b1;
@@ -2117,7 +2120,7 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
dag iops = (ins rGPR:$Rn, rGPR:$Rm),
string asm = "\t$Rd, $Rn, $Rm">
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -2215,13 +2218,13 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
"usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
class T2SatI<dag oops, dag iops, InstrItinClass itin,
@@ -2254,7 +2257,7 @@ def t2SSAT: T2SatI<
def t2SSAT16: T2SatI<
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@@ -2278,7 +2281,7 @@ def t2USAT: T2SatI<
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;
@@ -2288,8 +2291,8 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
let Inst{5-4} = 0b00;
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -2605,7 +2608,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
} // hasSideEffects
// Rounding variants of the below included for disassembly only
@@ -2614,7 +2617,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmul", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2624,7 +2627,7 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2636,7 +2639,7 @@ def t2SMMLA : T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2646,7 +2649,7 @@ def t2SMMLA : T2FourReg<
def t2SMMLAR: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2657,7 +2660,7 @@ def t2SMMLS: T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmls", "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2667,7 +2670,7 @@ def t2SMMLS: T2FourReg<
def t2SMMLSR:T2FourReg<
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
"smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -2679,7 +2682,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2692,7 +2695,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2705,7 +2708,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16)))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2718,7 +2721,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2730,7 +2733,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2742,7 +2745,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
[]>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2760,7 +2763,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
[(set rGPR:$Rd, (add rGPR:$Ra,
(opnode (sext_inreg rGPR:$Rn, i16),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2773,7 +2776,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2786,7 +2789,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sext_inreg rGPR:$Rm, i16))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2799,7 +2802,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
!strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -2811,7 +2814,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2823,7 +2826,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[]>,
- Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2839,79 +2842,79 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
def t2SMUAD: T2ThreeReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUADX:T2ThreeReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSD: T2ThreeReg_mac<
0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMUSDX:T2ThreeReg_mac<
0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]> {
+ Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2SMLAD : T2FourReg_mac<
0, 0b010, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLADX : T2FourReg_mac<
0, 0b010, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSD : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSDX : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
"\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALD : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLD : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
(ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
"\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasThumb2DSP]>;
+ Requires<[IsThumb2, HasDSP]>;
//===----------------------------------------------------------------------===//
// Division Instructions.
@@ -2961,7 +2964,7 @@ def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"rbit", "\t$Rd, $Rm",
- [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+ [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>,
Sched<[WriteALU]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e83f8c8..050cd1a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -20,7 +20,6 @@ def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
-
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -93,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", "\t$Sd, $addr",
- [(set SPR:$Sd, (load addrmode5:$addr))]> {
+ [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -107,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
IIC_fpStore32, "vstr", "\t$Sd, $addr",
- [(store SPR:$Sd, addrmode5:$addr)]> {
+ [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
// Some single precision VFP instructions may be executed on both NEON and VFP
// pipelines.
let D = VFPNeonDomain;
@@ -393,8 +392,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
}
}
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -541,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[HasFP16]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
@@ -922,6 +925,22 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let isRegSequence = 1;
}
+// Hoist an fabs or a fneg of a value coming from integer registers
+// and do the fabs/fneg on the integer value. This is never a lose
+// and could enable the conversion to float to be removed completely.
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsARM]>;
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+ Requires<[IsThumb2]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsARM]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+ (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
+ Requires<[IsThumb2]>;
+
let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
@@ -1003,7 +1022,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (sint_to_fp GPR:$a)),
(VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOD (VLDRS addrmode5:$a))>;
}
@@ -1021,7 +1040,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
(VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VSITOS (VLDRS addrmode5:$a))>;
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -1035,7 +1054,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(f64 (uint_to_fp GPR:$a)),
(VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
- def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+ def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOD (VLDRS addrmode5:$a))>;
}
@@ -1053,7 +1072,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
(VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
(VUITOS (VLDRS addrmode5:$a))>;
// FP -> Int:
@@ -1106,7 +1125,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1124,7 +1143,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1138,7 +1158,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
- def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
}
@@ -1156,7 +1176,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
-def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+ addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 265b86f..725b838 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,17 +60,24 @@ STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
+namespace llvm {
+void initializeARMLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
+
namespace {
/// Post- register allocation pass the combine load / store instructions to
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
@@ -84,7 +91,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM load / store optimization pass";
+ return ARM_LOAD_STORE_OPT_NAME;
}
private:
@@ -118,6 +125,7 @@ namespace {
};
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
+ SmallVector<MachineInstr*,4> MergeBaseCandidates;
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before);
@@ -140,12 +148,16 @@ namespace {
MachineBasicBlock::iterator &MBBI);
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+ bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+ bool CombineMovBx(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+
static bool definesCPSR(const MachineInstr *MI) {
for (const auto &MO : MI->operands()) {
if (!MO.isReg())
@@ -619,9 +631,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
unsigned NewBase;
if (isi32Load(Opcode)) {
- // If it is a load, then just use one of the destination register to
- // use as the new base.
+ // If it is a load, then just use one of the destination registers
+ // as the new base. Will no longer be writeback in Thumb1.
NewBase = Regs[NumRegs-1].first;
+ Writeback = false;
} else {
// Find a free register that we can use as scratch register.
moveLiveRegsBefore(MBB, InsertBefore);
@@ -725,9 +738,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
if (Writeback) {
- if (Opcode == ARM::tLDMIA)
+ assert(isThumb1 && "expected Writeback only inThumb1");
+ if (Opcode == ARM::tLDMIA) {
+ assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
+ }
MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
@@ -784,6 +800,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 4> ImpDefs;
DenseSet<unsigned> KilledRegs;
+ DenseSet<unsigned> UsedRegs;
// Determine list of registers and list of implicit super-register defs.
for (const MachineInstr *MI : Cand.Instrs) {
const MachineOperand &MO = getLoadStoreRegOp(*MI);
@@ -792,6 +809,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
if (IsKill)
KilledRegs.insert(Reg);
Regs.push_back(std::make_pair(Reg, IsKill));
+ UsedRegs.insert(Reg);
if (IsLoad) {
// Collect any implicit defs of super-registers, after merging we can't
@@ -881,7 +899,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isKill())
continue;
- if (KilledRegs.count(MO.getReg()))
+ if (UsedRegs.count(MO.getReg()))
MO.setIsKill(false);
}
}
@@ -995,76 +1013,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
} while (SIndex < EIndex);
}
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tSUBi8:
- case ARM::t2SUBri:
- case ARM::SUBri:
- CheckCPSRDef = true;
- break;
- case ARM::tSUBspi:
- break;
- }
-
- // Make sure the offset fits in 8 bits.
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
- MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
- unsigned Bytes, unsigned Limit,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- unsigned MyPredReg = 0;
- if (!MI)
- return false;
-
- bool CheckCPSRDef = false;
- switch (MI->getOpcode()) {
- default: return false;
- case ARM::tADDi8:
- case ARM::t2ADDri:
- case ARM::ADDri:
- CheckCPSRDef = true;
- break;
- case ARM::tADDspi:
- break;
- }
-
- if (Bytes == 0 || (Limit && Bytes >= Limit))
- // Make sure the offset fits in 8 bits.
- return false;
-
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
- MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
- if (!(MI->getOperand(0).getReg() == Base &&
- MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm() * Scale) == Bytes &&
- getInstrPredicate(MI, MyPredReg) == Pred &&
- MyPredReg == PredReg))
- return false;
-
- return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
ARM_AM::AMSubMode Mode) {
switch (Opc) {
@@ -1132,6 +1080,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ bool CheckCPSRDef;
+ int Scale;
+ switch (MI.getOpcode()) {
+ case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
+ case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
+ case ARM::t2SUBri:
+ case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
+ case ARM::t2ADDri:
+ case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
+ case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
+ case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+ default: return 0;
+ }
+
+ unsigned MIPredReg;
+ if (MI.getOperand(0).getReg() != Reg ||
+ MI.getOperand(1).getReg() != Reg ||
+ getInstrPredicate(&MI, MIPredReg) != Pred ||
+ MIPredReg != PredReg)
+ return 0;
+
+ if (CheckCPSRDef && definesCPSR(&MI))
+ return 0;
+ return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (MBBI == BeginMBBI)
+ return EndMBBI;
+
+ // Skip debug values.
+ MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+ while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+ --PrevMBBI;
+
+ Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+ ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+ Offset = 0;
+ MachineBasicBlock &MBB = *MBBI->getParent();
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+ // Skip debug values.
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
+ if (NextMBBI == EndMBBI)
+ return EndMBBI;
+
+ Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+ return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@@ -1151,7 +1168,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
const MachineOperand &BaseOP = MI->getOperand(0);
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
@@ -1163,49 +1179,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
if (MI->getOperand(i).getReg() == Base)
return false;
- bool DoMerge = false;
- ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- } else if (Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::da;
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(PrevMBBI);
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- if (DoMerge)
- MBB.erase(NextMBBI);
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
+ if (Mode == ARM_AM::ia && Offset == -Bytes) {
+ Mode = ARM_AM::db;
+ } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+ Mode = ARM_AM::da;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+ ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+ return false;
}
-
- if (!DoMerge)
- return false;
+ MBB.erase(MergeInstr);
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1283,7 +1274,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
- unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
@@ -1295,7 +1285,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- bool isLd = isLoadSingle(Opcode);
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@@ -1303,55 +1292,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
- bool DoMerge = false;
- ARM_AM::AddrOpc AddSub = ARM_AM::add;
- unsigned NewOpc = 0;
- // AM2 - 12 bits, thumb2 - 8 bits.
- unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
- // Try merging with the previous instruction.
+ int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
- MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
- if (MBBI != BeginMBBI) {
- MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
- while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
- --PrevMBBI;
- if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (!isAM5 &&
- isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(PrevMBBI);
- }
- }
-
- // Try merging with the next instruction.
- MachineBasicBlock::iterator EndMBBI = MBB.end();
- if (!DoMerge && MBBI != EndMBBI) {
- MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
- while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
- ++NextMBBI;
- if (!isAM5 &&
- isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
- DoMerge = true;
- AddSub = ARM_AM::sub;
- } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
- DoMerge = true;
- }
- if (DoMerge) {
- NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
- MBB.erase(NextMBBI);
- }
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr
+ = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+ unsigned NewOpc;
+ if (!isAM5 && Offset == Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (Offset == -Bytes) {
+ NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+ } else if (!isAM5 && Offset == -Bytes) {
+ NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+ } else
+ return false;
}
+ MBB.erase(MergeInstr);
- if (!DoMerge)
- return false;
+ ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
+ bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -1368,18 +1333,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
}
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
@@ -1391,13 +1354,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
- int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+ int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
} else {
- int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -1409,46 +1371,75 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return true;
}
-/// Returns true if instruction is a memory operation that this pass is capable
-/// of operating on.
-static bool isMemoryOp(const MachineInstr *MI) {
- // When no memory operands are present, conservatively assume unaligned,
- // volatile, unfoldable.
- if (!MI->hasOneMemOperand())
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+ "Must have t2STRDi8 or t2LDRDi8");
+ if (MI.getOperand(3).getImm() != 0)
return false;
- const MachineMemOperand *MMO = *MI->memoperands_begin();
-
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
+ // Behaviour for writeback is undefined if base register is the same as one
+ // of the others.
+ const MachineOperand &BaseOp = MI.getOperand(2);
+ unsigned Base = BaseOp.getReg();
+ const MachineOperand &Reg0Op = MI.getOperand(0);
+ const MachineOperand &Reg1Op = MI.getOperand(1);
+ if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
return false;
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
+ unsigned PredReg;
+ ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+ MachineBasicBlock::iterator MBBI(MI);
+ MachineBasicBlock &MBB = *MI.getParent();
+ int Offset;
+ MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+ PredReg, Offset);
+ unsigned NewOpc;
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+ } else {
+ MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+ if (Offset == 8 || Offset == -8) {
+ NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+ } else
+ return false;
+ }
+ MBB.erase(MergeInstr);
- // str <undef> could probably be eliminated entirely, but for now we just want
- // to avoid making a mess of it.
- // FIXME: Use str <undef> as a wildcard to enable better stm folding.
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
- MI->getOperand(0).isUndef())
- return false;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+ if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+ MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+ .addReg(BaseOp.getReg(), RegState::Define);
+ } else {
+ assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+ MIB.addReg(BaseOp.getReg(), RegState::Define)
+ .addOperand(Reg0Op).addOperand(Reg1Op);
+ }
+ MIB.addReg(BaseOp.getReg(), RegState::Kill)
+ .addImm(Offset).addImm(Pred).addReg(PredReg);
+ assert(TII->get(Opcode).getNumOperands() == 6 &&
+ TII->get(NewOpc).getNumOperands() == 7 &&
+ "Unexpected number of operands in Opcode specification.");
- // Likewise don't mess with references to undefined addresses.
- if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
- MI->getOperand(1).isUndef())
- return false;
+ // Transfer implicit operands.
+ for (const MachineOperand &MO : MI.implicit_operands())
+ MIB.addOperand(MO);
+ MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- unsigned Opcode = MI->getOpcode();
+ MBB.erase(MBBI);
+ return true;
+}
+
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
+static bool isMemoryOp(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
case ARM::VLDRS:
case ARM::VSTRS:
- return MI->getOperand(1).isReg();
case ARM::VLDRD:
case ARM::VSTRD:
- return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
@@ -1459,9 +1450,40 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- return MI->getOperand(1).isReg();
+ break;
+ default:
+ return false;
}
- return false;
+ if (!MI.getOperand(1).isReg())
+ return false;
+
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand &MMO = **MI.memoperands_begin();
+
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO.isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO.getAlignment() < 4)
+ return false;
+
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+ return false;
+
+ // Likewise don't mess with references to undefined addresses.
+ if (MI.getOperand(1).isUndef())
+ return false;
+
+ return true;
}
static void InsertLDR_STR(MachineBasicBlock &MBB,
@@ -1616,6 +1638,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned Position = 0;
assert(Candidates.size() == 0);
+ assert(MergeBaseCandidates.size() == 0);
LiveRegsValid = false;
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
@@ -1626,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
continue;
++Position;
- if (isMemoryOp(MBBI)) {
+ if (isMemoryOp(*MBBI)) {
unsigned Opcode = MBBI->getOpcode();
const MachineOperand &MO = MBBI->getOperand(0);
unsigned Reg = MO.getReg();
@@ -1694,8 +1717,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI = I;
--Position;
// Fallthrough to look into existing chain.
- } else if (MBBI->isDebugValue())
+ } else if (MBBI->isDebugValue()) {
continue;
+ } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+ MBBI->getOpcode() == ARM::t2STRDi8) {
+ // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+ // remember them because we may still be able to merge add/sub into them.
+ MergeBaseCandidates.push_back(MBBI);
+ }
+
// If we are here then the chain is broken; Extract candidates for a merge.
if (MemOps.size() > 0) {
@@ -1726,7 +1756,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (Merged) {
Changed = true;
unsigned Opcode = Merged->getOpcode();
- if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
+ if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+ MergeBaseUpdateLSDouble(*Merged);
+ else
MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
@@ -1741,6 +1773,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
Candidates.clear();
+ // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+ for (MachineInstr *MI : MergeBaseCandidates)
+ MergeBaseUpdateLSDouble(*MI);
+ MergeBaseCandidates.clear();
return Changed;
}
@@ -1765,7 +1801,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
(MBBI->getOpcode() == ARM::BX_RET ||
MBBI->getOpcode() == ARM::tBX_RET ||
MBBI->getOpcode() == ARM::MOVPCLR)) {
- MachineInstr *PrevMI = std::prev(MBBI);
+ MachineBasicBlock::iterator PrevI = std::prev(MBBI);
+ // Ignore any DBG_VALUE instructions.
+ while (PrevI->isDebugValue() && PrevI != MBB.begin())
+ --PrevI;
+ MachineInstr *PrevMI = PrevI;
unsigned Opcode = PrevMI->getOpcode();
if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
@@ -1786,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
return false;
}
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+ MBBI->getOpcode() != ARM::tBX_RET)
+ return false;
+
+ MachineBasicBlock::iterator Prev = MBBI;
+ --Prev;
+ if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+ return false;
+
+ for (auto Use : Prev->uses())
+ if (Use.isKill()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill))
+ .copyImplicitOps(&*MBBI);
+ MBB.erase(MBBI);
+ MBB.erase(Prev);
+ return true;
+ }
+
+ llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -1793,7 +1857,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
- MRI = &Fn.getRegInfo();
+
RegClassInfoValid = false;
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1805,18 +1869,29 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
+ if (isThumb1)
+ Modified |= CombineMovBx(MBB);
}
Allocator.DestroyAll();
return Modified;
}
+namespace llvm {
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
+ "ARM pre- register allocation load / store optimization pass"
+
namespace {
/// Pre- register allocation pass that move load / stores from consecutive
/// locations close to make it more likely they will be combined later.
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
+ initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
+ }
const DataLayout *TD;
const TargetInstrInfo *TII;
@@ -1828,7 +1903,7 @@ namespace {
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
- return "ARM pre- register allocation load / store optimization pass";
+ return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
private:
@@ -1847,8 +1922,11 @@ namespace {
char ARMPreAllocLoadStoreOpt::ID = 0;
}
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- TD = Fn.getTarget().getDataLayout();
+ TD = &Fn.getDataLayout();
STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
@@ -1856,9 +1934,8 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
MF = &Fn;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI)
- Modified |= RescheduleLoadStoreInstrs(MFI);
+ for (MachineBasicBlock &MFI : Fn)
+ Modified |= RescheduleLoadStoreInstrs(&MFI);
return Modified;
}
@@ -2187,7 +2264,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
if (!MI->isDebugValue())
MI2LocMap[MI] = ++Loc;
- if (!isMemoryOp(MI))
+ if (!isMemoryOp(*MI))
continue;
unsigned PredReg = 0;
if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
@@ -2275,3 +2352,4 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
return new ARMPreAllocLoadStoreOpt();
return new ARMLoadStoreOpt();
}
+
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index f5250ff..ac0330f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,5 +20,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
- GlobalBaseReg(0) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 14dd9ef..d644797 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -52,7 +52,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
unsigned ReturnRegsCount;
/// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
+ /// determineCalleeSaves().
bool HasStackFrame;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
@@ -110,11 +110,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
- /// GlobalBaseReg - keeps track of the virtual register initialized for
- /// use as the global base register. This is used for PIC in some PIC
- /// relocation models.
- unsigned GlobalBaseReg;
-
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
unsigned ArgumentStackSize;
@@ -133,7 +128,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+ VarArgsFrameIndex(0), HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -204,9 +199,6 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
- unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
- void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 45cc9ea..02cbfb1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -266,12 +266,19 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
}
// Scalar single precision floating point register class..
-// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
-// avoid partial-write dependencies on D registers (S registers are
-// renamed as portions of D registers).
-def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
- (sequence "S%u", 0, 31), 2),
- (sequence "S%u", 0, 31))>;
+// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
+// to avoid partial-write dependencies on D or Q (depending on platform)
+// registers (S registers are renamed as portions of D/Q registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
+ let AltOrders = [(add (decimate SPR, 2), SPR),
+ (add (decimate SPR, 4),
+ (decimate SPR, 2),
+ (decimate (rotl SPR, 1), 4),
+ (decimate (rotl SPR, 1), 2))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
+}
// Subset of SPR which can be used as a source of NEON scalars for 16-bit
// operations
@@ -281,25 +288,29 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
// class.
// ARM requires only word alignment for double. It's more performant if it
// is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(sequence "D%u", 0, 31)> {
- // Allocate non-VFP2 registers D16-D31 first.
- let AltOrders = [(rotl DPR, 16)];
- let AltOrderSelect = [{ return 1; }];
+ // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
+ // Darwin platforms.
+ let AltOrders = [(rotl DPR, 16),
+ (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
+ let AltOrderSelect = [{
+ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+ }];
}
// Subset of DPR that are accessible with VFP2 (and so that also have
// 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 16)>;
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
(trunc DPR, 8)>;
// Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
(sequence "Q%u", 0, 15)> {
// Allocate non-VFP2 aliases Q8-Q15 first.
let AltOrders = [(rotl QPR, 8)];
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
index b03d5ff..3ad7730 100644
--- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -37,1050 +37,13 @@ def SW_FDIV : FuncUnit;
// FIXME: Add preload instruction when it is documented.
// FIXME: Model non-pipelined nature of FP div / sqrt unit.
-def SwiftItineraries : ProcessorItineraries<
- [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
- //
- // Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
- InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3]>,
- InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_LS]>],
- [5]>,
- //
- // MVN instructions
- InstrItinData<IIC_iMVNi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iMVNsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // No operand cycles
- InstrItinData<IIC_iALUx , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
- //
- // Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Bitwise Instructions that produce a result
- InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1]>,
- //
- // Unary Instructions that produce a result
-
- // CLZ, RBIT, etc.
- InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
-
- // BFC, BFI, UBFX, SBFX
- InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1]>,
-
- //
- // Zero and sign extension instructions
- InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1, 1]>,
- //
- // Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Test instructions
- InstrItinData<IIC_iTSTi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iTSTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iTSTsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<2, [SW_ALU0, SW_ALU1]>],
- [1, 1, 1]>,
- //
- // Move instructions, conditional
- // FIXME: Correctly model the extra input dep on the destination.
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2]>,
-
- // Integer multiply pipeline
- //
- InstrItinData<IIC_iMUL16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 3>,
- InstrStage<1, [SW_ALU0]>],
- [5, 5, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 6, 1, 1]>,
- //
- // Integer divide
- InstrItinData<IIC_iDIV , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0], 0>,
- InstrStage<14, [SW_IDIV]>],
- [14, 1, 1]>,
-
- // Integer load pipeline
- // FIXME: The timings are some rough approximations
- //
- // Immediate offset
- InstrItinData<IIC_iLoad_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iLoad_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iLoad_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [5, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iLoad_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iLoad_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0], 1>,
- InstrStage<1, [SW_LS]>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [3, 4, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [5, 3, 1, 1]>,
- //
- // Load multiple, def is the 5th operand.
- // FIXME: This assumes 3 to 4 registers.
- InstrItinData<IIC_iLoad_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-
- //
- // Load multiple + update, defs are the 1st and 5th operands.
- InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Load multiple plus branch
- InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop, def is the 3rd operand.
- InstrItinData<IIC_iPop , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
- //
- // Pop + branch, def is the 3rd operand.
- InstrItinData<IIC_iPop_Br, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 3], [], -1>, // dynamic uops
-
- //
- // iLoadi + iALUr for t2LDRpci_pic.
- InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [4, 1]>,
-
- // Integer store pipeline
- ///
- // Immediate offset
- InstrItinData<IIC_iStore_i , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Register offset
- InstrItinData<IIC_iStore_r , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Scaled register offset
- InstrItinData<IIC_iStore_si , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Immediate offset with update
- InstrItinData<IIC_iStore_iu , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1]>,
- //
- // Register offset with update
- InstrItinData<IIC_iStore_ru , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 1]>,
- //
- // Scaled register offset with update
- InstrItinData<IIC_iStore_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
- InstrStage<1, [SW_LS], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
- [3, 1, 1, 1]>,
- //
- // Store multiple
- InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [], [], -1>, // dynamic uops
- //
- // Store multiple + update
- InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS], 1>,
- InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
- InstrStage<1, [SW_LS]>],
- [2], [], -1>, // dynamic uops
-
- //
- // Preload
- InstrItinData<IIC_Preload, [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
-
- // Branch
- //
- // no delay slots, so the latency of a branch is unimportant
- InstrItinData<IIC_Br , [InstrStage<1, [SW_DIS0], 0>]>,
-
- // FP Special Register to Integer Register File Move
- InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [1]>,
- //
- // Single-precision FP Unary
- //
- // Most floating-point moves get issued on ALU0.
- InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-precision FP Unary
- InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
-
- //
- // Single-precision FP Compare
- InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Double-precision FP Compare
- InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [1, 1]>,
- //
- // Single to Double FP Convert
- InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double to Single FP Convert
- InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single to Half FP Convert
- InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 4>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1]>,
- //
- // Half to Single FP Convert
- InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
-
- //
- // Single-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Double-Precision FP to Integer Convert
- InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Single-Precision FP Convert
- InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Integer to Double-Precision FP Convert
- InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1]>,
- //
- // Single-precision FP ALU
- InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-precision FP ALU
- InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Single-precision FP Multiply
- InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-precision FP Multiply
- InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 1]>,
- //
- // Single-precision FP MAC
- InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision FP MAC
- InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-precision Fused FP MAC
- InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [12, 1, 1]>,
- //
- // Single-precision FP DIV
- InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1, 1]>,
- //
- // Double-precision FP DIV
- InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
- //
- // Single-precision FP SQRT
- InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<15, [SW_FDIV]>],
- [17, 1]>,
- //
- // Double-precision FP SQRT
- InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 0>,
- InstrStage<30, [SW_FDIV]>],
- [32, 1, 1]>,
-
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_fpMOVIS, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_fpMOVID, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_fpMOVSI, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_fpMOVDI, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Single-precision FP Load
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // Double-precision FP Load
- InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1]>,
- //
- // FP Load Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // FP Load Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1, 4], [], -1>, // dynamic uops
- //
- // Single-precision FP Store
- InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // Double-precision FP Store
- InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1]>,
- //
- // FP Store Multiple
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [1, 1, 1], [], -1>, // dynamic uops
- //
- // FP Store Multiple + update
- // FIXME: Assumes a single Q register.
- InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0, SW_ALU1]>],
- [2, 1, 1, 1], [], -1>, // dynamic uops
- // NEON
- //
- // Double-register Integer Unary
- InstrItinData<IIC_VUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer Unary
- InstrItinData<IIC_VUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Q-Unary
- InstrItinData<IIC_VQUNAiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Quad-register Integer CountQ-Unary
- InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1]>,
- //
- // Double-register Integer Binary
- InstrItinData<IIC_VBINiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Binary
- InstrItinData<IIC_VBINiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Subtract
- InstrItinData<IIC_VSUBiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Subtract
- InstrItinData<IIC_VSUBiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift
- InstrItinData<IIC_VSHLiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Shift
- InstrItinData<IIC_VSHLiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Shift (4 cycle)
- InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Binary (4 cycle)
- InstrItinData<IIC_VBINi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Subtract (4 cycle)
- InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Count
- InstrItinData<IIC_VCNTiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Quad-register Integer Count
- InstrItinData<IIC_VCNTiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1, 1]>,
- //
- // Double-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Absolute Difference and Accumulate
- InstrItinData<IIC_VABAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Pair Add Long
- InstrItinData<IIC_VPALiD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Pair Add Long
- InstrItinData<IIC_VPALiQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.8, .16)
- InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
-
- //
- // Double-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register Integer Multiply (.32)
- InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Double-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.8, .16)
- InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
- //
- // Quad-register Integer Multiply-Accumulate (.32)
- InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1, 1]>,
-
- //
- // Move
- InstrItinData<IIC_VMOV, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Move Immediate
- InstrItinData<IIC_VMOVImm, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2]>,
- //
- // Double-register Permute Move
- InstrItinData<IIC_VMOVD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Quad-register Permute Move
- InstrItinData<IIC_VMOVQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Integer to Single-precision Move
- InstrItinData<IIC_VMOVIS , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
- //
- // Integer to Double-precision Move
- InstrItinData<IIC_VMOVID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [4, 1, 1]>,
- //
- // Single-precision to Integer Move
- InstrItinData<IIC_VMOVSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_LS]>],
- [3, 1]>,
- //
- // Double-precision to Integer Move
- InstrItinData<IIC_VMOVDI , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 3>,
- InstrStage<1, [SW_LS]>],
- [3, 4, 1]>,
- //
- // Integer to Lane Move
- // FIXME: I think this is correct, but it is not clear from the tuning guide.
- InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_LS], 4>,
- InstrStage<1, [SW_ALU0]>],
- [6, 1]>,
-
- //
- // Vector narrow move
- InstrItinData<IIC_VMOVN, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1]>,
- //
- // Double-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Quad-register FP Unary
- // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
- // and they issue on a different pipeline.
- InstrItinData<IIC_VUNAQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [2, 1]>,
- //
- // Double-register FP Binary
- // FIXME: We're using this itin for many instructions.
- InstrItinData<IIC_VBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
-
- //
- // VPADD, etc.
- InstrItinData<IIC_VPBIND, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Double-register FP VMUL
- InstrItinData<IIC_VFMULD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Quad-register FP Binary
- InstrItinData<IIC_VBINQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU0]>],
- [4, 1, 1]>,
- //
- // Quad-register FP VMUL
- InstrItinData<IIC_VFMULQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 1]>,
- //
- // Double-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FP Multiple-Accumulate
- InstrItinData<IIC_VMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Fused FP Multiple-Accumulate
- InstrItinData<IIC_VFMACD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register FusedF P Multiple-Accumulate
- InstrItinData<IIC_VFMACQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Reciprical Step
- InstrItinData<IIC_VRECSD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Quad-register Reciprical Step
- InstrItinData<IIC_VRECSQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 1]>,
- //
- // Double-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMD, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute
- // FIXME: The latencies are unclear from the documentation.
- InstrItinData<IIC_VPERMQ, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
- //
- // Quad-register Permute (3 cycle issue on A9)
- InstrItinData<IIC_VPERMQ3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [3, 4, 3, 4]>,
-
- //
- // Double-register VEXT
- InstrItinData<IIC_VEXTD, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // Quad-register VEXT
- InstrItinData<IIC_VEXTQ, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- //
- // VTB
- InstrItinData<IIC_VTB1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTB2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTB3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTB4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>,
- //
- // VTBX
- InstrItinData<IIC_VTBX1, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1]>],
- [2, 1, 1]>,
- InstrItinData<IIC_VTBX2, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [4, 1, 3, 3]>,
- InstrItinData<IIC_VTBX3, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [6, 1, 3, 5, 5]>,
- InstrItinData<IIC_VTBX4, [InstrStage<1, [SW_DIS0], 0>,
- InstrStage<1, [SW_DIS1], 0>,
- InstrStage<1, [SW_DIS2], 0>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1], 2>,
- InstrStage<1, [SW_ALU1]>],
- [8, 1, 3, 5, 7, 7]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
-
// Swift machine model for scheduling and other instruction cost heuristics.
def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MicroOpBufferSize = 45; // Based on NEON renamed registers.
let LoadLatency = 3;
let MispredictPenalty = 14; // A branch direction mispredict.
-
- let Itineraries = SwiftItineraries;
+ let CompleteModel = 0; // FIXME: Remove if all instructions are covered.
}
// Swift predicates.
@@ -1558,6 +521,13 @@ let SchedModel = SwiftModel in {
(instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
"PUSH", "tPUSH")>;
+ // LDRLIT pseudo instructions, they expand to LDR + PICADD
+ def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
+ (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+ // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+ def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle],
+ (instregex "LDRLIT_ga_pcrel_ldr")>;
+
// 4.2.26 Branch
def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
diff --git a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 6cafbbb..6fded9c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -160,41 +160,39 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = 4;
unsigned i = 0;
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
- const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
+ const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
SDValue TFOps[6];
SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, dl, MVT::i32)),
- SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
- false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
+ // VLDM/VSTM and make this code emit it when appropriate. This would reduce
+ // pressure on the general purpose registers. However this seems harder to map
+ // onto the register allocator's view of the world.
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, dl, MVT::i32)),
- DstPtrInfo.getWithOffset(DstOff),
- isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- makeArrayRef(TFOps, i));
+ // The number of MEMCPY pseudo-instructions to emit. We use up to
+ // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
+ // later on. This is a lower bound on the number of MEMCPY operations we must
+ // emit.
+ unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
+
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
+
+ for (unsigned I = 0; I != NumMEMCPYs; ++I) {
+ // Evenly distribute registers among MEMCPY operations to reduce register
+ // pressure.
+ unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
+ unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
+
+ Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
+ DAG.getConstant(NumRegs, dl, MVT::i32));
+ Src = Dst.getValue(1);
+ Chain = Dst.getValue(2);
+
+ DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
+ SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
- EmittedNumMemOps += i;
+ EmittedNumMemOps = NextEmittedNumMemOps;
}
if (BytesLeft == 0)
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 002c3e9..bb6ae28 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -40,37 +41,9 @@ using namespace llvm;
#include "ARMGenSubtargetInfo.inc"
static cl::opt<bool>
-ReserveR9("arm-reserve-r9", cl::Hidden,
- cl::desc("Reserve R9, making it unavailable as GPR"));
-
-static cl::opt<bool>
-ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
UseFusedMulOps("arm-use-mulops",
cl::init(true), cl::Hidden);
-namespace {
-enum AlignMode {
- DefaultAlign,
- StrictAlign,
- NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
- cl::Hidden, cl::init(DefaultAlign),
- cl::values(
- clEnumValN(DefaultAlign, "arm-default-align",
- "Generate unaligned accesses only on hardware/OS "
- "combinations that are known to support them"),
- clEnumValN(StrictAlign, "arm-strict-align",
- "Disallow all unaligned memory accesses"),
- clEnumValN(NoStrictAlign, "arm-no-strict-align",
- "Allow unaligned memory accesses"),
- clEnumValEnd));
-
enum ITMode {
DefaultIT,
RestrictedIT,
@@ -88,6 +61,12 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
+/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
+/// currently supported (for testing only).
+static cl::opt<bool>
+ForceFastISel("arm-force-fast-isel",
+ cl::init(false), cl::Hidden);
+
/// initializeSubtargetDependencies - Initializes using a CPU and feature string
/// so that we can use initializer lists for subtarget initialization.
ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -110,8 +89,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const ARMBaseTargetMachine &TM, bool IsLittle)
: ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
- TargetTriple(TT), Options(TM.Options), TM(TM),
+ ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
+ IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
FrameLowering(initializeFrameLowering(CPU, FS)),
// At this point initializeSubtargetDependencies has been called so
// we can query directly.
@@ -133,6 +112,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV7Ops = false;
HasV8Ops = false;
HasV8_1aOps = false;
+ HasV8_2aOps = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
@@ -147,10 +127,11 @@ void ARMSubtarget::initializeEnvironment() {
UseSoftFloat = false;
HasThumb2 = false;
NoARM = false;
- IsR9Reserved = ReserveR9;
- UseMovt = false;
+ ReserveR9 = false;
+ NoMovt = false;
SupportsTailCall = false;
HasFP16 = false;
+ HasFullFP16 = false;
HasD16 = false;
HasHardwareDivide = false;
HasHardwareDivideInARM = false;
@@ -168,20 +149,36 @@ void ARMSubtarget::initializeEnvironment() {
HasCrypto = false;
HasCRC = false;
HasZeroCycleZeroing = false;
- AllowsUnalignedMem = false;
- Thumb2DSP = false;
+ StrictAlign = false;
+ HasDSP = false;
UseNaClTrap = false;
GenLongCalls = false;
UnsafeFPMath = false;
+
+ // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
+ // directly from it, but we can try to make sure they're consistent when both
+ // available.
+ UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+ assert((!TM.getMCAsmInfo() ||
+ (TM.getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::SjLj) == UseSjLjEH) &&
+ "inconsistent sjlj choice between CodeGen and MC");
}
void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (CPUString.empty()) {
- if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
- // Default to the Swift CPU when targeting armv7s/thumbv7s.
- CPUString = "swift";
- else
- CPUString = "generic";
+ CPUString = "generic";
+
+ if (isTargetDarwin()) {
+ StringRef ArchName = TargetTriple.getArchName();
+ if (ArchName.endswith("v7s"))
+ // Default to the Swift CPU when targeting armv7s/thumbv7s.
+ CPUString = "swift";
+ else if (ArchName.endswith("v7k"))
+ // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
+ // ARMv7k does not use SjLj exception handling.
+ CPUString = "cortex-a7";
+ }
}
// Insert the architecture feature derived from the target triple into the
@@ -212,44 +209,31 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isAAPCS_ABI())
stackAlignment = 8;
- if (isTargetNaCl())
+ if (isTargetNaCl() || isAAPCS16_ABI())
stackAlignment = 16;
- UseMovt = hasV6T2Ops() && ArmUseMOVT;
-
- if (isTargetMachO()) {
- IsR9Reserved = ReserveR9 || !HasV6Ops;
- SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
- } else {
- IsR9Reserved = ReserveR9;
- SupportsTailCall = !isThumb1Only();
- }
-
- if (Align == DefaultAlign) {
- // Assume pre-ARMv6 doesn't support unaligned accesses.
- //
- // ARMv6 may or may not support unaligned accesses depending on the
- // SCTLR.U bit, which is architecture-specific. We assume ARMv6
- // Darwin and NetBSD targets support unaligned accesses, and others don't.
- //
- // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
- // which raises an alignment fault on unaligned accesses. Linux
- // defaults this bit to 0 and handles it as a system-wide (not
- // per-process) setting. It is therefore safe to assume that ARMv7+
- // Linux targets support unaligned accesses. The same goes for NaCl.
- //
- // The above behavior is consistent with GCC.
- AllowsUnalignedMem =
- (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
- isTargetNetBSD())) ||
- (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
- } else {
- AllowsUnalignedMem = !(Align == StrictAlign);
- }
-
- // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
- if (isV6M())
- AllowsUnalignedMem = false;
+ // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
+ // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+ // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+ // support in the assembler and linker to be used. This would need to be
+ // fixed to fully support tail calls in Thumb1.
+ //
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+
+ SupportsTailCall = !isThumb1Only();
+
+ if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
+ SupportsTailCall = false;
switch (IT) {
case DefaultIT:
@@ -276,9 +260,15 @@ bool ARMSubtarget::isAPCS_ABI() const {
}
bool ARMSubtarget::isAAPCS_ABI() const {
assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
- return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
+ TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+bool ARMSubtarget::isAAPCS16_ABI() const {
+ assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+ return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
+
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
bool
ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
@@ -321,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
}
bool ARMSubtarget::hasSinCos() const {
- return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
+ return isTargetWatchOS() ||
+ (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
+}
+
+bool ARMSubtarget::enableMachineScheduler() const {
+ // Enable the MachineScheduler before register allocation for out-of-order
+ // architectures where we do not use the PostRA scheduler anymore (for now
+ // restricted to swift).
+ return getSchedModel().isOutOfOrder() && isSwift();
}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
+ // No need for PostRA scheduling on out of order CPUs (for now restricted to
+ // swift).
+ if (getSchedModel().isOutOfOrder() && isSwift())
+ return false;
return (!isThumb() || hasThumb2());
}
@@ -333,15 +335,30 @@ bool ARMSubtarget::enableAtomicExpand() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
+bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+ // For general targets, the prologue can grow when VFPs are allocated with
+ // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
+ // format which it's more important to get right.
+ return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+}
+
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of
// range otherwise.
- return UseMovt && (isTargetWindows() ||
- !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
+ return !NoMovt && hasV6T2Ops() &&
+ (isTargetWindows() || !MF.getFunction()->optForMinSize());
}
bool ARMSubtarget::useFastISel() const {
+ // Enable fast-isel for any target, for testing only.
+ if (ForceFastISel)
+ return true;
+
+ // Limit fast-isel to the targets that are or have been tested.
+ if (!hasV6Ops())
+ return false;
+
// Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
return TM.Options.EnableFastISel &&
((isTargetMachO() && !isThumb1Only()) ||
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index dd101df..a8b2801 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -43,11 +43,17 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
+ CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
+ CortexA57, CortexA72, Krait, Swift
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
};
+ enum ARMArchEnum {
+ ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
+ ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
+ ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+ };
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -55,6 +61,9 @@ protected:
/// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
ARMProcClassEnum ARMProcClass;
+ /// ARMArch - ARM architecture
+ ARMArchEnum ARMArch;
+
/// HasV4TOps, HasV5TOps, HasV5TEOps,
/// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
/// Specify whether target support specific ARM ISA variants.
@@ -68,6 +77,7 @@ protected:
bool HasV7Ops;
bool HasV8Ops;
bool HasV8_1aOps;
+ bool HasV8_2aOps;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -109,22 +119,24 @@ protected:
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
- /// IsR9Reserved - True if R9 is a not available as general purpose register.
- bool IsR9Reserved;
+ /// ReserveR9 - True if R9 is not available as a general purpose register.
+ bool ReserveR9;
- /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
- /// imms (including global addresses).
- bool UseMovt;
+ /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
+ /// 32-bit imms (including global addresses).
+ bool NoMovt;
/// SupportsTailCall - True if the OS supports tail call. The dynamic linker
/// must be able to synthesize call stubs for interworking between ARM and
/// Thumb.
bool SupportsTailCall;
- /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
- /// only so far)
+ /// HasFP16 - True if subtarget supports half-precision FP conversions
bool HasFP16;
+ /// HasFullFP16 - True if subtarget supports half-precision FP operations
+ bool HasFullFP16;
+
/// HasD16 - True if subtarget is limited to 16 double precision
/// FP registers for VFPv3.
bool HasD16;
@@ -190,18 +202,18 @@ protected:
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
- /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+ /// StrictAlign - If true, the subtarget disallows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsMisalignedMemoryAccesses().
- bool AllowsUnalignedMem;
+ bool StrictAlign;
/// RestrictIT - If true, the subtarget disallows generation of deprecated IT
/// blocks to conform to ARMv8 rule.
bool RestrictIT;
- /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
- /// and such) instructions in Thumb2 code.
- bool Thumb2DSP;
+ /// HasDSP - If true, the subtarget supports the DSP (saturating arith
+ /// and such) instructions.
+ bool HasDSP;
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
@@ -212,6 +224,9 @@ protected:
/// Target machine allowed unsafe FP math (such as use of NEON fp)
bool UnsafeFPMath;
+ /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
+ bool UseSjLjEH;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -297,6 +312,7 @@ public:
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
+ bool hasV8_2aOps() const { return HasV8_2aOps; }
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -343,17 +359,20 @@ public:
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRAS() const { return HasRAS; }
bool hasMPExtension() const { return HasMPExtension; }
- bool hasThumb2DSP() const { return Thumb2DSP; }
+ bool hasDSP() const { return HasDSP; }
bool useNaClTrap() const { return UseNaClTrap; }
+ bool useSjLjEH() const { return UseSjLjEH; }
bool genLongCalls() const { return GenLongCalls; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }
+ bool hasFullFP16() const { return HasFullFP16; }
const Triple &getTargetTriple() const { return TargetTriple; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
+ bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -375,6 +394,11 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF) &&
!isTargetDarwin() && !isTargetWindows();
}
+ bool isTargetGNUAEABI() const {
+ return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
+ TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+ !isTargetDarwin() && !isTargetWindows();
+ }
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
@@ -383,7 +407,7 @@ public:
TargetTriple.getEnvironment() == Triple::GNUEABI ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
- TargetTriple.getEnvironment() == Triple::Android) &&
+ isTargetAndroid()) &&
!isTargetDarwin() && !isTargetWindows();
}
@@ -391,14 +415,13 @@ public:
// FIXME: this is invalid for WindowsCE
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF ||
- isTargetWindows();
- }
- bool isTargetAndroid() const {
- return TargetTriple.getEnvironment() == Triple::Android;
+ isTargetWindows() || isAAPCS16_ABI();
}
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
+ bool isAAPCS16_ABI() const;
bool useSoftFloat() const { return UseSoftFloat; }
bool isThumb() const { return InThumbMode; }
@@ -409,17 +432,17 @@ public:
bool isRClass() const { return ARMProcClass == RClass; }
bool isAClass() const { return ARMProcClass == AClass; }
- bool isV6M() const {
- return isThumb1Only() && isMClass();
+ bool isR9Reserved() const {
+ return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
}
- bool isR9Reserved() const { return IsR9Reserved; }
+ bool useStride4VFPs(const MachineFunction &MF) const;
bool useMovt(const MachineFunction &MF) const;
bool supportsTailCall() const { return SupportsTailCall; }
- bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+ bool allowsUnalignedMem() const { return !StrictAlign; }
bool restrictIT() const { return RestrictIT; }
@@ -433,6 +456,9 @@ public:
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// Returns true if machine scheduler should be enabled.
+ bool enableMachineScheduler() const override;
+
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 93495d6..fca1901 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
static ARMBaseTargetMachine::ARMABI
computeTargetABI(const Triple &TT, StringRef CPU,
const TargetOptions &Options) {
- if (Options.MCOptions.getABIName().startswith("aapcs"))
+ if (Options.MCOptions.getABIName() == "aapcs16")
+ return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+ else if (Options.MCOptions.getABIName().startswith("aapcs"))
return ARMBaseTargetMachine::ARM_ABI_AAPCS;
else if (Options.MCOptions.getABIName().startswith("apcs"))
return ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
(TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
CPU.startswith("cortex-m")) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ } else if (TT.isWatchOS()) {
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
} else {
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
}
@@ -106,7 +110,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
if (TT.isOSNetBSD())
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
else
- TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+ TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
}
}
@@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// to 64. We always ty to give them natural alignment.
if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
Ret += "-v64:32:64-v128:32:128";
- else
+ else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-v128:64:128";
// Try to align aggregates to 32 bits (the default is 64 bits, which has no
@@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
// aligned everywhere else.
- if (TT.isOSNaCl())
+ if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
Ret += "-S128";
else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
Ret += "-S64";
@@ -184,6 +188,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType =
Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+
+ // Default to triple-appropriate EABI
+ if (Options.EABIVersion == EABI::Default ||
+ Options.EABIVersion == EABI::Unknown) {
+ if (Subtarget.isTargetGNUAEABI())
+ this->Options.EABIVersion = EABI::GNU;
+ else
+ this->Options.EABIVersion = EABI::EABI5;
+ }
}
ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
@@ -225,12 +238,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
}
TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(ARMTTIImpl(this, F));
+ });
}
-
-void ARMTargetMachine::anchor() { }
+void ARMTargetMachine::anchor() {}
ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -244,7 +257,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
"support ARM mode execution!");
}
-void ARMLETargetMachine::anchor() { }
+void ARMLETargetMachine::anchor() {}
ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -253,7 +266,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ARMBETargetMachine::anchor() { }
+void ARMBETargetMachine::anchor() {}
ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -262,7 +275,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void ThumbTargetMachine::anchor() { }
+void ThumbTargetMachine::anchor() {}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -273,7 +286,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-void ThumbLETargetMachine::anchor() { }
+void ThumbLETargetMachine::anchor() {}
ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -282,7 +295,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
-void ThumbBETargetMachine::anchor() { }
+void ThumbBETargetMachine::anchor() {}
ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -348,7 +361,13 @@ bool ARMPassConfig::addPreISel() {
// tricky when doing code gen per function.
bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
(EnableGlobalMerge == cl::BOU_UNSET);
- addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+ // Merging of extern globals is enabled by default on non-Mach-O as we
+ // expect it to be generally either beneficial or harmless. On Mach-O it
+ // is disabled as we emit the .subsections_via_symbols directive which
+ // means that merging extern globals is not safe.
+ bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+ addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
+ MergeExternalByDefault));
}
return false;
@@ -356,9 +375,6 @@ bool ARMPassConfig::addPreISel() {
bool ARMPassConfig::addInstSelector() {
addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
-
- if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel)
- addPass(createARMGlobalBaseRegPass());
return false;
}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
index 8c98e08..8ad1f3d 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h
@@ -26,7 +26,8 @@ public:
enum ARMABI {
ARM_ABI_UNKNOWN,
ARM_ABI_APCS,
- ARM_ABI_AAPCS // ARM EABI
+ ARM_ABI_AAPCS, // ARM EABI
+ ARM_ABI_AAPCS16
} TargetABI;
protected:
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2f194cf..c152011 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
-unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -47,12 +47,12 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 3;
}
-unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
// Single to/from double precision conversions.
- static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
+ static const CostTblEntry NEONFltDblTbl[] = {
// Vector fptrunc/fpext conversions.
{ ISD::FP_ROUND, MVT::v2f64, 2 },
{ ISD::FP_EXTEND, MVT::v2f32, 2 },
@@ -61,10 +61,9 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
ISD == ISD::FP_EXTEND)) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
- int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
- if (Idx != -1)
- return LT.first * NEONFltDblTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
EVT SrcTy = TLI->getValueType(DL, Src);
@@ -76,8 +75,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
// Some arithmetic, load and store operations have specific instructions
// to cast up/down their types automatically at no extra cost.
// TODO: Get these tables to know at least what the related operations are.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -153,15 +151,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isVector() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar float to integer conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONFloatConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
@@ -184,15 +181,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
};
if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONFloatConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer to float conversions.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
@@ -216,15 +212,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger() && ST->hasNEON()) {
- int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return NEONIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
+ ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
// Scalar integer conversion costs.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- ARMIntegerConversionTbl[] = {
+ static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
// i16 -> i64 requires two dependent operations.
{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
@@ -236,17 +231,17 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
};
if (SrcTy.isInteger()) {
- int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return ARMIntegerConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) {
+int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
// Penalize inserting into an D-subregister. We end up with a three times
// lower estimated throughput on swift.
if (ST->isSwift() &&
@@ -255,28 +250,30 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
ValTy->getScalarSizeInBits() <= 32)
return 3;
- // Cross-class copies are expensive on many microarchitectures,
- // so assume they are expensive by default.
if ((Opcode == Instruction::InsertElement ||
- Opcode == Instruction::ExtractElement) &&
- ValTy->getVectorElementType()->isIntegerTy())
- return 3;
+ Opcode == Instruction::ExtractElement)) {
+ // Cross-class copies are expensive on many microarchitectures,
+ // so assume they are expensive by default.
+ if (ValTy->getVectorElementType()->isIntegerTy())
+ return 3;
+
+ // Even if it's not a cross class copy, this likely leads to mixing
+ // of NEON and VFP code and should be therefore penalized.
+ if (ValTy->isVectorTy() &&
+ ValTy->getScalarSizeInBits() <= 32)
+ return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+ }
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
// Lowering of some vector selects is currently far from perfect.
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- NEONVectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+ static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
@@ -285,21 +282,20 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
EVT SelCondTy = TLI->getValueType(DL, CondTy);
EVT SelValTy = TLI->getValueType(DL, ValTy);
if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
- SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return NEONVectorSelectTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+ SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT()))
+ return Entry->Cost;
}
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
return LT.first;
}
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -314,7 +310,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return 1;
}
-unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+int ARMTTIImpl::getFPOpCost(Type *Ty) {
// Use similar logic that's in ARMISelLowering:
// Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
// to VFP.
@@ -333,14 +329,14 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
return TargetTransformInfo::TCC_Expensive;
}
-unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ static const CostTblEntry NEONShuffleTbl[] = {
// Reverse shuffle cost one instruction if we are shuffling within a
// double word (vrev) or two if we shuffle a quad word (vrev, vext).
{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
@@ -353,16 +349,16 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+ if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
if (Kind == TTI::SK_Alternate) {
- static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ static const CostTblEntry NEONAltShuffleTbl[] = {
// Alt shuffle cost table for ARM. Cost is the number of instructions
// required to create the shuffled vector.
@@ -379,27 +375,26 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- int Idx =
- CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned ARMTTIImpl::getArithmeticInstrCost(
+int ARMTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
const unsigned FunctionCallDivCost = 20;
const unsigned ReciprocalDivCost = 10;
- static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
+ static const CostTblEntry CostTbl[] = {
// Division.
// These costs are somewhat random. Choose a cost of 20 to indicate that
// vectorizing devision (added function call) is going to be very expensive.
@@ -440,16 +435,12 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
// Multiplication.
};
- int Idx = -1;
-
if (ST->hasNEON())
- Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
-
- if (Idx != -1)
- return LT.first * CostTbl[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
+ return LT.first * Entry->Cost;
- unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
- Opd1PropInfo, Opd2PropInfo);
+ int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo);
// This is somewhat of a hack. The problem that we are facing is that SROA
// creates a sequence of shift, and, or instructions to construct values.
@@ -465,10 +456,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
return Cost;
}
-unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
Src->getVectorElementType()->isDoubleTy()) {
@@ -479,21 +469,21 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return LT.first;
}
-unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace) {
+int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
assert(Factor >= 2 && "Invalid interleave factor");
assert(isa<VectorType>(VecTy) && "Expect a vector type");
// vldN/vstN doesn't support vector types of i64/f64 element.
- bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+ bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+ unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 84f256f..7d8d238 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -41,7 +41,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *getTLI() const { return TLI; }
public:
- explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+ explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -52,11 +52,13 @@ public:
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
+ bool enableInterleavedAccessVectorization() { return true; }
+
/// \name Scalar TTI Implementations
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
/// @}
@@ -92,34 +94,31 @@ public:
return 1;
}
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+ int getAddressComputationCost(Type *Val, bool IsComplex);
- unsigned getFPOpCost(Type *Ty);
+ int getFPOpCost(Type *Ty);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
- unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+ ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf6b892..c69a741 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,7 +129,6 @@ public:
};
class ARMAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
const MCRegisterInfo *MRI;
UnwindContext UC;
@@ -247,48 +246,49 @@ class ARMAsmParser : public MCTargetAsmParser {
OperandVector &Operands);
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[ARM::ModeThumb];
+ return getSTI().getFeatureBits()[ARM::ModeThumb];
}
bool isThumbOne() const {
- return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool isThumbTwo() const {
- return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
}
bool hasThumb() const {
- return STI.getFeatureBits()[ARM::HasV4TOps];
+ return getSTI().getFeatureBits()[ARM::HasV4TOps];
}
bool hasV6Ops() const {
- return STI.getFeatureBits()[ARM::HasV6Ops];
+ return getSTI().getFeatureBits()[ARM::HasV6Ops];
}
bool hasV6MOps() const {
- return STI.getFeatureBits()[ARM::HasV6MOps];
+ return getSTI().getFeatureBits()[ARM::HasV6MOps];
}
bool hasV7Ops() const {
- return STI.getFeatureBits()[ARM::HasV7Ops];
+ return getSTI().getFeatureBits()[ARM::HasV7Ops];
}
bool hasV8Ops() const {
- return STI.getFeatureBits()[ARM::HasV8Ops];
+ return getSTI().getFeatureBits()[ARM::HasV8Ops];
}
bool hasARM() const {
- return !STI.getFeatureBits()[ARM::FeatureNoARM];
+ return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
}
- bool hasThumb2DSP() const {
- return STI.getFeatureBits()[ARM::FeatureDSPThumb2];
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[ARM::FeatureDSP];
}
bool hasD16() const {
- return STI.getFeatureBits()[ARM::FeatureD16];
+ return getSTI().getFeatureBits()[ARM::FeatureD16];
}
bool hasV8_1aOps() const {
- return STI.getFeatureBits()[ARM::HasV8_1aOps];
+ return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
}
void SwitchMode() {
+ MCSubtargetInfo &STI = copySTI();
uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
setAvailableFeatures(FB);
}
bool isMClass() const {
- return STI.getFeatureBits()[ARM::FeatureMClass];
+ return getSTI().getFeatureBits()[ARM::FeatureMClass];
}
/// @name Auto-generated Match Functions
@@ -343,14 +343,15 @@ public:
Match_RequiresNotITBlock,
Match_RequiresV6,
Match_RequiresThumb2,
+ Match_RequiresV8,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "ARMGenAsmMatcher.inc"
};
- ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+ ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : STI(STI), MII(MII), UC(Parser) {
+ : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) {
MCAsmParserExtension::Initialize(Parser);
// Cache the MCRegisterInfo.
@@ -564,87 +565,6 @@ class ARMOperand : public MCParsedAsmOperand {
public:
ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
- ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
- Kind = o.Kind;
- StartLoc = o.StartLoc;
- EndLoc = o.EndLoc;
- switch (Kind) {
- case k_CondCode:
- CC = o.CC;
- break;
- case k_ITCondMask:
- ITMask = o.ITMask;
- break;
- case k_Token:
- Tok = o.Tok;
- break;
- case k_CCOut:
- case k_Register:
- Reg = o.Reg;
- break;
- case k_RegisterList:
- case k_DPRRegisterList:
- case k_SPRRegisterList:
- Registers = o.Registers;
- break;
- case k_VectorList:
- case k_VectorListAllLanes:
- case k_VectorListIndexed:
- VectorList = o.VectorList;
- break;
- case k_CoprocNum:
- case k_CoprocReg:
- Cop = o.Cop;
- break;
- case k_CoprocOption:
- CoprocOption = o.CoprocOption;
- break;
- case k_Immediate:
- Imm = o.Imm;
- break;
- case k_MemBarrierOpt:
- MBOpt = o.MBOpt;
- break;
- case k_InstSyncBarrierOpt:
- ISBOpt = o.ISBOpt;
- case k_Memory:
- Memory = o.Memory;
- break;
- case k_PostIndexRegister:
- PostIdxReg = o.PostIdxReg;
- break;
- case k_MSRMask:
- MMask = o.MMask;
- break;
- case k_BankedReg:
- BankedReg = o.BankedReg;
- break;
- case k_ProcIFlags:
- IFlags = o.IFlags;
- break;
- case k_ShifterImmediate:
- ShifterImm = o.ShifterImm;
- break;
- case k_ShiftedRegister:
- RegShiftedReg = o.RegShiftedReg;
- break;
- case k_ShiftedImmediate:
- RegShiftedImm = o.RegShiftedImm;
- break;
- case k_RotateImmediate:
- RotImm = o.RotImm;
- break;
- case k_ModifiedImmediate:
- ModImm = o.ModImm;
- break;
- case k_BitfieldDescriptor:
- Bitfield = o.Bitfield;
- break;
- case k_VectorIndex:
- VectorIndex = o.VectorIndex;
- break;
- }
- }
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
@@ -4054,7 +3974,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U)
return MatchOperand_NoMatch;
- if (!hasThumb2DSP() && (FlagsVal & 0x400))
+ if (!hasDSP() && (FlagsVal & 0x400))
// The _g and _nzcvqg versions are only valid if the DSP extension is
// available.
return MatchOperand_NoMatch;
@@ -5202,6 +5122,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// FALLTHROUGH
}
case AsmToken::Colon: {
+ S = Parser.getTok().getLoc();
// ":lower16:" and ":upper16:" expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
@@ -5220,8 +5141,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return false;
}
case AsmToken::Equal: {
+ S = Parser.getTok().getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
- return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+ return Error(S, "unexpected token in operand");
Parser.Lex(); // Eat '='
const MCExpr *SubExprVal;
@@ -5229,7 +5151,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ const MCExpr *CPLoc =
+ getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
return false;
}
@@ -5682,9 +5605,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
- static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+ (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
- static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
+ (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
RegIdx = 4;
if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
@@ -8610,18 +8535,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ } else if (isThumbOne()) {
+ // Some high-register supporting Thumb1 encodings only allow both registers
+ // to be from r0-r7 when in Thumb2.
+ if (Opc == ARM::tADDhirr && !hasV6MOps() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
+ isARMLowRegister(Inst.getOperand(2).getReg()))
+ return Match_RequiresThumb2;
+ // Others only require ARMv6 or later.
+ else if (Opc == ARM::tMOVr && !hasV6Ops() &&
+ isARMLowRegister(Inst.getOperand(0).getReg()) &&
+ isARMLowRegister(Inst.getOperand(1).getReg()))
+ return Match_RequiresV6;
}
- // Some high-register supporting Thumb1 encodings only allow both registers
- // to be from r0-r7 when in Thumb2.
- else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
- isARMLowRegister(Inst.getOperand(1).getReg()) &&
- isARMLowRegister(Inst.getOperand(2).getReg()))
- return Match_RequiresThumb2;
- // Others only require ARMv6 or later.
- else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
- isARMLowRegister(Inst.getOperand(0).getReg()) &&
- isARMLowRegister(Inst.getOperand(1).getReg()))
- return Match_RequiresV6;
+
+ for (unsigned I = 0; I < MCID.NumOperands; ++I)
+ if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
+ // rGPRRegClass excludes PC, and also excluded SP before ARMv8
+ if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+ return Match_RequiresV8;
+ else if (Inst.getOperand(I).getReg() == ARM::PC)
+ return Match_InvalidOperand;
+ }
+
return Match_Success;
}
@@ -8680,7 +8616,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return false;
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
assert(ErrorInfo && "Unknown missing feature!");
@@ -8720,6 +8656,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_RequiresV8:
+ return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8868,7 +8806,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
return false;
}
- getParser().getStreamer().EmitValue(Value, Size);
+ getParser().getStreamer().EmitValue(Value, Size, L);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -9098,7 +9036,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(L, "Unknown arch name");
@@ -9106,7 +9044,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
}
Triple T;
- STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
getTargetStreamer().emitArch(ID);
@@ -9233,12 +9172,13 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
// FIXME: This is using table-gen data, but should be moved to
// ARMTargetParser once that is table-gen'd.
- if (!STI.isCPUStringValid(CPU)) {
+ if (!getSTI().isCPUStringValid(CPU)) {
Error(L, "Unknown CPU name");
return false;
}
- STI.setDefaultFeatures(CPU);
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, "");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
return false;
@@ -9249,13 +9189,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
SMLoc FPUNameLoc = getTok().getLoc();
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARMTargetParser::parseFPU(FPU);
+ unsigned ID = ARM::parseFPU(FPU);
std::vector<const char *> Features;
- if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
+ if (!ARM::getFPUFeatures(ID, Features)) {
Error(FPUNameLoc, "Unknown FPU name");
return false;
}
+ MCSubtargetInfo &STI = copySTI();
for (auto Feature : Features)
STI.ApplyFeatureFlag(Feature);
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -9895,7 +9836,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
SMLoc ArchLoc = Parser.getTok().getLoc();
getLexer().Lex();
- unsigned ID = ARMTargetParser::parseArch(Arch);
+ unsigned ID = ARM::parseArch(Arch);
if (ID == ARM::AK_INVALID) {
Error(ArchLoc, "unknown architecture '" + Arch + "'");
@@ -9976,22 +9917,22 @@ extern "C" void LLVMInitializeARMAsmParser() {
// when we start to table-generate them, and we can use the ARM
// flags below, that were generated by table-gen.
static const struct {
- const ARM::ArchExtKind Kind;
- const unsigned ArchCheck;
+ const unsigned Kind;
+ const uint64_t ArchCheck;
const FeatureBitset Features;
} Extensions[] = {
{ ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
{ ARM::AEK_CRYPTO, Feature_HasV8,
{ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
{ ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
- { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
+ { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
{ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
{ ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
{ ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
- // FIXME: Also available in ARMv6-K
- { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
+ { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
// FIXME: Only available in A-class, isel not predicated
{ ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+ { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
// FIXME: Unsupported extensions.
{ ARM::AEK_OS, Feature_None, {} },
{ ARM::AEK_IWMMXT, Feature_None, {} },
@@ -10020,7 +9961,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
EnableFeature = false;
Name = Name.substr(2);
}
- unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+ unsigned FeatureKind = ARM::parseArchExt(Name);
if (FeatureKind == ARM::AEK_INVALID)
Error(ExtLoc, "unknown architectural extension: " + Name);
@@ -10037,6 +9978,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
return false;
}
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset ToggleFeatures = EnableFeature
? (~STI.getFeatureBits() & Extension.Features)
: ( STI.getFeatureBits() & Extension.Features);
@@ -10078,6 +10020,10 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
"expression value must be representable in 32 bits");
}
break;
+ case MCK_rGPR:
+ if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP)
+ return Match_Success;
+ break;
case MCK_GPRPair:
if (Op.isReg() &&
MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 097ec04..e63defe 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -59,7 +59,7 @@ namespace {
}
// Called when decoding an IT instruction. Sets the IT state for the following
- // instructions that for the IT block. Firstcond and Mask correspond to the
+ // instructions that for the IT block. Firstcond and Mask correspond to the
// fields in the IT instruction encoding.
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
@@ -459,21 +459,18 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// VFP and NEON instructions, similarly, are shared between ARM
// and Thumb modes.
- MI.clear();
Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -485,7 +482,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
@@ -497,7 +493,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -509,7 +504,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -517,7 +511,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -525,7 +518,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -718,7 +710,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
STI);
if (Result) {
@@ -729,7 +720,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -763,7 +753,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn32 =
(Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -774,7 +763,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
Result =
decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -784,7 +772,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result =
decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -794,7 +781,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Result =
decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -803,7 +789,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
- MI.clear();
Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
STI);
if (Result != MCDisassembler::Fail) {
@@ -814,7 +799,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
- MI.clear();
uint32_t NEONLdStInsn = Insn32;
NEONLdStInsn &= 0xF0FFFFFF;
NEONLdStInsn |= 0x04000000;
@@ -828,7 +812,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
- MI.clear();
uint32_t NEONDataInsn = Insn32;
NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -841,7 +824,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONCryptoInsn = Insn32;
NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -853,7 +835,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return Result;
}
- MI.clear();
uint32_t NEONv8Insn = Insn32;
NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
@@ -864,7 +845,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- MI.clear();
Size = 0;
return MCDisassembler::Fail;
}
@@ -902,7 +882,7 @@ static DecodeStatus
DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
-
+
if (RegNo == 15)
S = MCDisassembler::SoftFail;
@@ -986,8 +966,13 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
- if (RegNo == 13 || RegNo == 15)
+
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+ if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15)
S = MCDisassembler::SoftFail;
+
Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
return S;
}
@@ -1147,7 +1132,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
unsigned imm = fieldFromInstruction(Val, 7, 5);
// Register-immediate
- if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
return MCDisassembler::Fail;
ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
@@ -1658,7 +1643,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::STRD_POST:
if (P == 0 && W == 1)
S = MCDisassembler::SoftFail;
-
+
if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
S = MCDisassembler::SoftFail;
if (type && Rm == 15)
@@ -4131,7 +4116,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
// indicates the move for the GE{3:0} bits, the mask{0} bit can be set
// only if the processor includes the DSP extension.
if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
- (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1)))
+ (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1)))
S = MCDisassembler::SoftFail;
}
}
@@ -5065,6 +5050,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5075,10 +5064,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv2f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv2f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv2f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv1i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv8i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv2i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv2i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5095,6 +5109,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
+ const FeatureBitset &featureBits =
+ ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+ bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5105,10 +5123,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
DecodeStatus S = MCDisassembler::Success;
- // VMOVv4f32 is ambiguous with these decodings.
- if (!(imm & 0x38) && cmode == 0xF) {
- if (op == 1) return MCDisassembler::Fail;
- Inst.setOpcode(ARM::VMOVv4f32);
+ // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+ if (!(imm & 0x38)) {
+ if (cmode == 0xF) {
+ if (op == 1) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::VMOVv4f32);
+ }
+ if (hasFullFP16) {
+ if (cmode == 0xE) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMOVv2i64);
+ } else {
+ Inst.setOpcode(ARM::VMOVv16i8);
+ }
+ }
+ if (cmode == 0xD) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ if (cmode == 0xC) {
+ if (op == 1) {
+ Inst.setOpcode(ARM::VMVNv4i32);
+ } else {
+ Inst.setOpcode(ARM::VMOVv4i32);
+ }
+ }
+ }
return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
}
@@ -5132,7 +5175,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
unsigned Rm = fieldFromInstruction(Val, 0, 4);
Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
unsigned Cond = fieldFromInstruction(Val, 28, 4);
-
+
if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
S = MCDisassembler::SoftFail;
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 0bff521..33fc85a 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -804,7 +805,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
unsigned Opcode = MI->getOpcode();
// For writes, handle extended mask bits if the DSP extension is present.
- if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) {
+ if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
switch (SYSm) {
case 0x400:
O << "apsr_g";
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3927c9f..52f7115 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class ARMInstPrinter : public MCInstPrinter {
public:
ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1114635..fa52c93 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,13 +25,17 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -180,9 +184,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
return false;
}
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const {
switch ((unsigned)Fixup.getKind()) {
case ARM::fixup_arm_thumb_br: {
// Relaxing tB to t2B. tB has a signed 12-bit displacement with the
@@ -192,7 +195,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 2046 || Offset < -2048;
+ if (Offset > 2046 || Offset < -2048)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_arm_thumb_bcc: {
// Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
@@ -202,23 +207,40 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
//
// Relax if the value is too big for a (signed) i8.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 254 || Offset < -256;
+ if (Offset > 254 || Offset < -256)
+ return "out of range pc-relative fixup value";
+ break;
}
case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp: {
// If the immediate is negative, greater than 1020, or not a multiple
// of four, the wide version of the instruction must be used.
int64_t Offset = int64_t(Value) - 4;
- return Offset > 1020 || Offset < 0 || Offset & 3;
+ if (Offset & 3)
+ return "misaligned pc-relative fixup value";
+ else if (Offset > 1020 || Offset < 0)
+ return "out of range pc-relative fixup value";
+ break;
}
- case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_cb: {
// If we have a Thumb CBZ or CBNZ instruction and its target is the next
// instruction it is is actually out of range for the instruction.
// It will be changed to a NOP.
int64_t Offset = (Value & ~1);
- return Offset == 2;
+ if (Offset == 2)
+ return "will be converted to nop";
+ break;
}
- llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+ default:
+ llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
+ }
+ return nullptr;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ return reasonForFixupRelaxation(Fixup, Value);
}
void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
@@ -317,9 +339,10 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
return Value;
}
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
- bool IsLittleEndian) {
+unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian,
+ bool IsResolved) const {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
@@ -372,8 +395,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 4096) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10,
@@ -383,8 +408,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
- case ARM::fixup_thumb_adr_pcrel_10:
- return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_adr_pcrel_12: {
// ARM PC-relative values are offset by 8.
Value -= 8;
@@ -393,8 +416,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
// Encode the immediate and shift the opcode into place.
return ARM_AM::getSOImmVal(Value) | (opc << 21);
}
@@ -517,21 +542,44 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
((uint16_t)imm10LBits) << 1);
return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
+ case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_cp:
- // Offset by 4, and don't encode the low two bits. Two bytes of that
- // 'off by 4' is implicitly handled by the half-word ordering of the
- // Thumb encoding, so we only need to adjust by 2 here.
- return ((Value - 2) >> 2) & 0xff;
+ // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
+ // could have an error on our hands.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
+ // Offset by 4, and don't encode the low two bits.
+ return ((Value - 4) >> 2) & 0xff;
case ARM::fixup_arm_thumb_cb: {
// Offset by 4 and don't encode the lower bit, which is always 0.
+ // FIXME: diagnose if no Thumb2
uint32_t Binary = (Value - 4) >> 1;
return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
+ if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+ if (FixupDiagnostic) {
+ Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ return 0;
+ }
+ }
return ((Value - 4) >> 1) & 0xff;
case ARM::fixup_arm_pcrel_10_unscaled: {
Value = Value - 8; // ARM fixups offset by an additional word and don't
@@ -542,8 +590,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
return Value | (isAdd << 23);
}
@@ -561,8 +611,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Ctx && Value >= 256) {
+ Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ return 0;
+ }
Value |= isAdd << 23;
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -582,6 +634,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
const MCValue &Target, uint64_t &Value,
bool &IsResolved) {
const MCSymbolRefExpr *A = Target.getSymA();
+ const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
// Some fixups to thumb function symbols need the low bit (thumb bit)
// twiddled.
if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
@@ -590,18 +643,21 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
(unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
(unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
- if (A) {
- const MCSymbol &Sym = A->getSymbol();
- if (Asm.isThumbFunc(&Sym))
+ if (Sym) {
+ if (Asm.isThumbFunc(Sym))
Value |= 1;
}
}
- // For Thumb1 BL instruction, it is possible to be a long jump between
- // the basic blocks of the same function. Thus, we would like to resolve
- // the offset when the destination has the same MCFragment.
- if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
- const MCSymbol &Sym = A->getSymbol();
- IsResolved = (Sym.getFragment() == DF);
+ if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+ assert(Sym && "How did we resolve this?");
+
+ // If the symbol is external the linker will handle it.
+ // FIXME: Should we handle it as an optimization?
+
+ // If the symbol is out of range, produce a relocation and hope the
+ // linker can handle it. GNU AS produces an error in this case.
+ if (Sym->isExternal() || Value >= 0x400004)
+ IsResolved = false;
}
// We must always generate a relocation for BL/BLX instructions if we have
// a symbol to reference, as the linker relies on knowing the destination
@@ -616,7 +672,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the instruction. This allows adjustFixupValue() to issue a diagnostic
// if the value aren't invalid.
(void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian);
+ IsLittleEndian, IsResolved);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -719,7 +775,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
+ Value =
+ adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
@@ -743,6 +800,249 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+ UNWIND_ARM_MODE_MASK = 0x0F000000,
+ UNWIND_ARM_MODE_FRAME = 0x01000000,
+ UNWIND_ARM_MODE_FRAME_D = 0x02000000,
+ UNWIND_ARM_MODE_DWARF = 0x04000000,
+
+ UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
+
+ UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
+ UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
+
+ UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
+ UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
+
+ UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000F00,
+
+ UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF
+};
+
+} // end CU namespace
+
+/// Generate compact unwind encoding for the function based on the CFI
+/// instructions. If the CFI instructions describe a frame that cannot be
+/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
+/// tells the runtime to fallback and unwind using dwarf.
+uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
+ // Only armv7k uses CFI based unwinding.
+ if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
+ return 0;
+ // No .cfi directives means no frame.
+ if (Instrs.empty())
+ return 0;
+ // Start off assuming CFA is at SP+0.
+ int CFARegister = ARM::SP;
+ int CFARegisterOffset = 0;
+ // Mark savable registers as initially unsaved
+ DenseMap<unsigned, int> RegOffsets;
+ int FloatRegCount = 0;
+ // Process each .cfi directive and build up compact unwind info.
+ for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ int Reg;
+ const MCCFIInstruction &Inst = Instrs[i];
+ switch (Inst.getOperation()) {
+ case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
+ CFARegisterOffset = -Inst.getOffset();
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
+ CFARegisterOffset = -Inst.getOffset();
+ break;
+ case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
+ CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ break;
+ case MCCFIInstruction::OpOffset: // DW_CFA_offset
+ Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+ RegOffsets[Reg] = Inst.getOffset();
+ else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+ RegOffsets[Reg] = Inst.getOffset();
+ ++FloatRegCount;
+ } else {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << ".cfi_offset on unknown register="
+ << Inst.getRegister() << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ break;
+ case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc
+ // Ignore
+ break;
+ default:
+ // Directive not convertable to compact unwind, bail out.
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "CFI directive not compatiable with comact "
+ "unwind encoding, opcode=" << Inst.getOperation()
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ break;
+ }
+ }
+
+ // If no frame set up, return no unwind info.
+ if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0))
+ return 0;
+
+ // Verify standard frame (lr/r7) was used.
+ if (CFARegister != ARM::R7) {
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
+ << CFARegister
+ << " instead of r7\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ int StackAdjust = CFARegisterOffset - 8;
+ if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs()
+ << "LR not saved as standard frame, StackAdjust="
+ << StackAdjust
+ << ", CFARegisterOffset=" << CFARegisterOffset
+ << ", lr save at offset=" << RegOffsets[14] << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "r7 not saved as standard frame\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME;
+
+ // If var-args are used, there may be a stack adjust required.
+ switch (StackAdjust) {
+ case 0:
+ break;
+ case 4:
+ CompactUnwindEncoding |= 0x00400000;
+ break;
+ case 8:
+ CompactUnwindEncoding |= 0x00800000;
+ break;
+ case 12:
+ CompactUnwindEncoding |= 0x00C00000;
+ break;
+ default:
+ DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs()
+ << ".cfi_def_cfa stack adjust ("
+ << StackAdjust << ") out of range\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // If r6 is saved, it must be right below r7.
+ static struct {
+ unsigned Reg;
+ unsigned Encoding;
+ } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6},
+ {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5},
+ {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4},
+ {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12},
+ {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11},
+ {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10},
+ {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9},
+ {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}};
+
+ int CurOffset = -8 - StackAdjust;
+ for (auto CSReg : GPRCSRegs) {
+ auto Offset = RegOffsets.find(CSReg.Reg);
+ if (Offset == RegOffsets.end())
+ continue;
+
+ int RegOffset = Offset->second;
+ if (RegOffset != CurOffset - 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at "
+ << RegOffset << " but only supported at "
+ << CurOffset << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CompactUnwindEncoding |= CSReg.Encoding;
+ CurOffset -= 4;
+ }
+
+ // If no floats saved, we are done.
+ if (FloatRegCount == 0)
+ return CompactUnwindEncoding;
+
+ // Switch mode to include D register saving.
+ CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK;
+ CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D;
+
+ // FIXME: supporting more than 4 saved D-registers compactly would be trivial,
+ // but needs coordination with the linker and libunwind.
+ if (FloatRegCount > 4) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << "unsupported number of D registers saved ("
+ << FloatRegCount << ")\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+
+ // Floating point registers must either be saved sequentially, or we defer to
+ // DWARF. No gaps allowed here so check that each saved d-register is
+ // precisely where it should be.
+ static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 };
+ for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) {
+ auto Offset = RegOffsets.find(FPRCSRegs[Idx]);
+ if (Offset == RegOffsets.end()) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " not saved\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ } else if (Offset->second != CurOffset - 8) {
+ DEBUG_WITH_TYPE("compact-unwind",
+ llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+ << MRI.getName(FPRCSRegs[Idx])
+ << " saved at " << Offset->second
+ << ", expected at " << CurOffset - 8
+ << "\n");
+ return CU::UNWIND_ARM_MODE_DWARF;
+ }
+ CurOffset -= 8;
+ }
+
+ return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
+}
+
+static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
+ unsigned AK = ARM::parseArch(Arch);
+ switch (AK) {
+ default:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV4T:
+ return MachO::CPU_SUBTYPE_ARM_V4T;
+ case ARM::AK_ARMV5T:
+ case ARM::AK_ARMV5TE:
+ case ARM::AK_ARMV5TEJ:
+ return MachO::CPU_SUBTYPE_ARM_V5;
+ case ARM::AK_ARMV6:
+ case ARM::AK_ARMV6K:
+ return MachO::CPU_SUBTYPE_ARM_V6;
+ case ARM::AK_ARMV7A:
+ return MachO::CPU_SUBTYPE_ARM_V7;
+ case ARM::AK_ARMV7S:
+ return MachO::CPU_SUBTYPE_ARM_V7S;
+ case ARM::AK_ARMV7K:
+ return MachO::CPU_SUBTYPE_ARM_V7K;
+ case ARM::AK_ARMV6M:
+ return MachO::CPU_SUBTYPE_ARM_V6M;
+ case ARM::AK_ARMV7M:
+ return MachO::CPU_SUBTYPE_ARM_V7M;
+ case ARM::AK_ARMV7EM:
+ return MachO::CPU_SUBTYPE_ARM_V7EM;
+ }
+}
+
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
const Triple &TheTriple, StringRef CPU,
@@ -751,19 +1051,8 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
default:
llvm_unreachable("unsupported object format");
case Triple::MachO: {
- MachO::CPUSubTypeARM CS =
- StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
- .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
- .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
- .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
- .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
- .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
- .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
- .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
- .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
- .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
- return new ARMAsmBackendDarwin(T, TheTriple, CS);
+ MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
+ return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
}
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6b4abd5..28a6213 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -45,6 +45,10 @@ public:
const MCValue &Target, uint64_t &Value,
bool &IsResolved) override;
+ unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+ MCContext *Ctx, bool IsLittleEndian,
+ bool IsResolved) const;
+
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
@@ -52,6 +56,9 @@ public:
bool mayNeedRelaxation(const MCInst &Inst) const override;
+ const char *reasonForFixupRelaxation(const MCFixup &Fixup,
+ uint64_t Value) const;
+
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index a6206e3..995dd0f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -16,11 +16,12 @@ using namespace llvm;
namespace {
class ARMAsmBackendDarwin : public ARMAsmBackend {
+ const MCRegisterInfo &MRI;
public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const Triple &TT,
- MachO::CPUSubTypeARM st)
- : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+ const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
+ : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
HasDataInCodeSupport = true;
}
@@ -28,6 +29,9 @@ public:
return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
Subtype);
}
+
+ uint32_t generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const override;
};
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 804d353..52eba8be 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -95,7 +95,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_ARM_TLS_IE32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
}
@@ -192,7 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTOFF:
Type = ELF::R_ARM_GOTOFF32;
break;
- case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_ARM_GOT_PREL:
Type = ELF::R_ARM_GOT_PREL;
break;
case MCSymbolRefExpr::VK_ARM_TARGET1:
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d17fdb9..6084f22 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitAttribute(unsigned Attribute, unsigned Value) override;
void emitTextAttribute(unsigned Attribute, StringRef String) override;
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
- StringRef StrinValue) override;
+ StringRef StringValue) override;
void emitArch(unsigned Arch) override;
void emitArchExtension(unsigned ArchExt) override;
void emitObjectArch(unsigned Arch) override;
@@ -195,16 +195,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
OS << "\n";
}
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
- OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n";
+ OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
- OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n";
+ OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
- OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n';
+ OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
- OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n";
+ OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
void ARMTargetAsmStreamer::finishAttributeSection() {
}
@@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
// This structure holds all attributes, accounting for
- // their string/numeric value, so we can later emmit them
+ // their string/numeric value, so we can later emit them
// in declaration order, keeping all in the same vector
struct AttributeItem {
enum {
@@ -254,7 +254,7 @@ private:
} Type;
unsigned Tag;
unsigned IntValue;
- StringRef StringValue;
+ std::string StringValue;
static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
// The conformance tag must be emitted first when serialised
@@ -507,14 +507,15 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
- if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
- getContext().reportFatalError(Loc, "relocated expression must be 32-bit");
+ if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
+ getContext().reportError(Loc, "relocated expression must be 32-bit");
+ return;
+ }
EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
+ MCELFStreamer::EmitValueImpl(Value, Size, Loc);
}
void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
@@ -684,16 +685,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
setAttributeItem(CPU_name,
- ARMTargetParser::getCPUAttr(Arch),
+ ARM::getCPUAttr(Arch),
false);
if (EmittedArch == ARM::AK_INVALID)
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(Arch),
+ ARM::getArchAttr(Arch),
false);
else
setAttributeItem(CPU_arch,
- ARMTargetParser::getArchAttr(EmittedArch),
+ ARM::getArchAttr(EmittedArch),
false);
switch (Arch) {
@@ -702,7 +703,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV3:
case ARM::AK_ARMV3M:
case ARM::AK_ARMV4:
- case ARM::AK_ARMV5:
setAttributeItem(ARM_ISA_use, Allowed, false);
break;
@@ -710,7 +710,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV5T:
case ARM::AK_ARMV5TE:
case ARM::AK_ARMV6:
- case ARM::AK_ARMV6J:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
@@ -721,8 +720,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
case ARM::AK_ARMV6K:
- case ARM::AK_ARMV6Z:
- case ARM::AK_ARMV6ZK:
+ case ARM::AK_ARMV6KZ:
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, Allowed, false);
setAttributeItem(Virtualization_use, AllowTZ, false);
@@ -732,10 +730,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
setAttributeItem(THUMB_ISA_use, Allowed, false);
break;
- case ARM::AK_ARMV7:
- setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
- break;
-
case ARM::AK_ARMV7A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -755,6 +749,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
+ case ARM::AK_ARMV8_2A:
setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
setAttributeItem(ARM_ISA_use, Allowed, false);
setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1084,19 +1079,14 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.extab",
- ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC,
- SectionKind::getDataRel(),
- FnStart);
+ SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC,
+ SectionKind::getData(), FnStart);
}
inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
- SwitchToEHSection(".ARM.exidx",
- ELF::SHT_ARM_EXIDX,
+ SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX,
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
- SectionKind::getDataRel(),
- FnStart);
+ SectionKind::getData(), FnStart);
}
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1ac0815..bda37f6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -33,7 +33,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::SjLj;
+ ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+ ? ExceptionHandling::SjLj
+ : ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
}
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 99a5fff..5e54816 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,34 +19,37 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
-
- class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
- };
-
- class ARMELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit ARMELFMCAsmInfo(const Triple &TT);
-
- void setUseIntegratedAssembler(bool Value) override;
- };
-
- class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoMicrosoft();
- };
-
- class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
- void anchor() override;
- public:
- explicit ARMCOFFMCAsmInfoGNU();
- };
+class Triple;
+
+class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
+};
+
+class ARMELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit ARMELFMCAsmInfo(const Triple &TT);
+
+ void setUseIntegratedAssembler(bool Value) override;
+};
+
+class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoMicrosoft();
+};
+
+class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit ARMCOFFMCAsmInfoGNU();
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 9146d4d..75dde80 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -63,8 +63,8 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS ARMMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 21c9fc1..8c8c249 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -134,101 +135,11 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
bool isThumb =
TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
- bool NoCPU = CPU == "generic" || CPU.empty();
std::string ARMArchFeature;
- switch (TT.getSubArch()) {
- default:
- llvm_unreachable("invalid sub-architecture for ARM");
- case Triple::ARMSubArch_v8:
- if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8";
- break;
- case Triple::ARMSubArch_v8_1a:
- if (NoCPU)
- // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
- // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
- // FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
- ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
- "+trustzone,+t2xtpk,+crypto,+crc";
- else
- // Use CPU to figure out the exact features
- ARMArchFeature = "+v8.1a";
- break;
- case Triple::ARMSubArch_v7m:
- isThumb = true;
- if (NoCPU)
- // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7em:
- if (NoCPU)
- // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
- // FeatureT2XtPk, FeatureMClass
- ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7s:
- if (NoCPU)
- // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
- // Swift
- ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v7:
- // v7 CPUs have lots of different feature sets. If no CPU is specified,
- // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
- // the "minimum" feature set and use CPU string to figure out the exact
- // features.
- if (NoCPU)
- // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
- ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
- else
- // Use CPU to figure out the exact features.
- ARMArchFeature = "+v7";
- break;
- case Triple::ARMSubArch_v6t2:
- ARMArchFeature = "+v6t2";
- break;
- case Triple::ARMSubArch_v6k:
- ARMArchFeature = "+v6k";
- break;
- case Triple::ARMSubArch_v6m:
- isThumb = true;
- if (NoCPU)
- // v6m: FeatureNoARM, FeatureMClass
- ARMArchFeature = "+v6m,+noarm,+mclass";
- else
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v6:
- ARMArchFeature = "+v6";
- break;
- case Triple::ARMSubArch_v5te:
- ARMArchFeature = "+v5te";
- break;
- case Triple::ARMSubArch_v5:
- ARMArchFeature = "+v5t";
- break;
- case Triple::ARMSubArch_v4t:
- ARMArchFeature = "+v4t";
- break;
- case Triple::NoSubArch:
- break;
- }
+
+ unsigned ArchID = ARM::parseArch(TT.getArchName());
+ if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic"))
+ ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str();
if (isThumb) {
if (ARMArchFeature.empty())
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index fd30623..c2bbc8e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -86,7 +86,8 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T,
// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll);
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IncrementalLinkerCompatible);
/// Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95d7ea7..cfd504e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -150,10 +150,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint32_t Value2 = 0;
@@ -163,10 +165,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_HALF_SECTDIFF;
@@ -251,10 +255,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -265,10 +271,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(Fixup.getLoc(),
"symbol '" + B->getSymbol().getName() +
"' can not be undefined in a subtraction expression");
+ return;
+ }
// Select the appropriate difference relocation type.
Type = MachO::ARM_RELOC_SECTDIFF;
@@ -346,13 +354,15 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
unsigned Log2Size;
unsigned RelocType = MachO::ARM_RELOC_VANILLA;
- if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+ if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
// If we failed to get fixup kind info, it's because there's no legal
// relocation type for the fixup kind. This happens when it's a fixup that's
// expected to always be resolvable at assembly time and not have any
// relocations needed.
- Asm.getContext().reportFatalError(Fixup.getLoc(),
- "unsupported relocation on symbol");
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation on symbol");
+ return;
+ }
// If this is a difference or a defined symbol plus an offset, then we need a
// scattered relocation entry. Differences always require scattered
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b680db5..dad50f2 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -27,8 +27,8 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
-const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
- return ConstantPools->addEntry(Streamer, Expr, 4);
+const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) {
+ return ConstantPools->addEntry(Streamer, Expr, 4, Loc);
}
void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b993b1b..83fa084 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,11 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
}
-MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll) {
- return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+MCStreamer *llvm::createARMWinCOFFStreamer(
+ MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) {
+ auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+ return S;
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 3b4358b..93e0ac4 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -13,6 +13,7 @@
#include "Thumb1FrameLowering.h"
#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -84,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -100,7 +100,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
unsigned FramePtr = RegInfo->getFrameRegister(MF);
unsigned BasePtr = RegInfo->getBaseRegister();
int CFAOffset = 0;
@@ -168,8 +172,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
++MBBI;
- if (MBBI != MBB.end())
- dl = MBBI->getDebugLoc();
}
// Determine starting offsets of spill areas.
@@ -232,11 +234,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
}
}
-
// Adjust FP so it point to the stack slot that contains the previous FP.
if (HasFP) {
- FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI)
- + GPRCS1Size + ArgRegsSaveSize;
+ FramePtrOffsetInBlock +=
+ MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
.setMIFlags(MachineInstr::FrameSetup));
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert((MBBI->getOpcode() == ARM::tBX_RET ||
- MBBI->getOpcode() == ARM::tPOP_RET) &&
- "Can only insert epilog into returning blocks");
- DebugLoc dl = MBBI->getDebugLoc();
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
ARM::SP)
.addReg(FramePtr));
} else {
- if (MBBI->getOpcode() == ARM::tBX_RET &&
- &MBB.front() != MBBI &&
- std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+ &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -388,66 +385,189 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- bool IsV4PopReturn = false;
- for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+ if (needPopSpecialFixUp(MF)) {
+ bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
+ (void)Done;
+ assert(Done && "Emission of the special fixup failed!?");
+ }
+}
+
+bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ if (!needPopSpecialFixUp(*MBB.getParent()))
+ return true;
+
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+ return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
+}
+
+bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
+ ARMFunctionInfo *AFI =
+ const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
+ if (AFI->getArgRegsSaveSize())
+ return true;
+
+ // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
+ for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
if (CSI.getReg() == ARM::LR)
- IsV4PopReturn = true;
- IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
-
- // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
- // to LR, and we can't pop the value directly to the PC since
- // we need to update the SP after popping the value. So instead
- // we have to emit:
- // POP {r3}
- // ADD sp, #offset
- // BX r3
- // If this would clobber a return value, then generate this sequence instead:
- // MOV ip, r3
- // POP {r3}
- // ADD sp, #offset
- // MOV lr, r3
- // MOV r3, ip
- // BX lr
- if (ArgRegsSaveSize || IsV4PopReturn) {
- // Get the last instruction, tBX_RET
- MBBI = MBB.getLastNonDebugInstr();
- assert (MBBI->getOpcode() == ARM::tBX_RET);
- DebugLoc dl = MBBI->getDebugLoc();
-
- if (AFI->getReturnRegsCount() <= 3) {
- // Epilogue: pop saved LR to R3 and branch off it.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
-
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
- .addReg(ARM::R3, RegState::Kill);
- AddDefaultPred(MIB);
- MIB.copyImplicitOps(&*MBBI);
- // erase the old tBX_RET instruction
- MBB.erase(MBBI);
- } else {
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ return true;
+
+ return false;
+}
+
+bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
+ bool DoIt) const {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const ThumbRegisterInfo *RegInfo =
+ static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
- .addReg(ARM::R3, RegState::Define);
+ // If MBBI is a return instruction, or is a tPOP followed by a return
+ // instruction in the successor BB, we may be able to directly restore
+ // LR in the PC.
+ // This is only possible with v5T ops (v4T can't change the Thumb bit via
+ // a POP PC instruction), and only if we do not need to emit any SP update.
+ // Otherwise, we need a temporary register to pop the value
+ // and copy that value into LR.
+ auto MBBI = MBB.getFirstTerminator();
+ bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
+ if (CanRestoreDirectly) {
+ if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
+ CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
+ MBBI->getOpcode() == ARM::tPOP_RET);
+ else {
+ auto MBBI_prev = MBBI;
+ MBBI_prev--;
+ assert(MBBI_prev->getOpcode() == ARM::tPOP);
+ assert(MBB.succ_size() == 1);
+ if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
+ MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
+ else
+ CanRestoreDirectly = false;
+ }
+ }
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+ if (CanRestoreDirectly) {
+ if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
+ return true;
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ // Copy implicit ops and popped registers, if any.
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
+ MIB.addOperand(MO);
+ MIB.addReg(ARM::PC, RegState::Define);
+ // Erase the old instruction (tBX_RET or tPOP).
+ MBB.erase(MBBI);
+ return true;
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::R3, RegState::Kill));
+ // Look for a temporary register to use.
+ // First, compute the liveness information.
+ LivePhysRegs UsedRegs(STI.getRegisterInfo());
+ UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+ // The semantic of pristines changed recently and now,
+ // the callee-saved registers that are touched in the function
+ // are not part of the pristines set anymore.
+ // Add those callee-saved now.
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ UsedRegs.addReg(CSRegs[i]);
+
+ DebugLoc dl = DebugLoc();
+ if (MBBI != MBB.end()) {
+ dl = MBBI->getDebugLoc();
+ auto InstUpToMBBI = MBB.end();
+ while (InstUpToMBBI != MBBI)
+ // The pre-decrement is on purpose here.
+ // We want to have the liveness right before MBBI.
+ UsedRegs.stepBackward(*--InstUpToMBBI);
+ }
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::R3, RegState::Define)
- .addReg(ARM::R12, RegState::Kill));
- // Keep the tBX_RET instruction
+ // Look for a register that can be directly use in the POP.
+ unsigned PopReg = 0;
+ // And some temporary register, just in case.
+ unsigned TemporaryReg = 0;
+ BitVector PopFriendly =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+ assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+ // Rebuild the GPRs from the high registers because they are removed
+ // form the GPR reg class for thumb1.
+ BitVector GPRsNoLRSP =
+ TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+ GPRsNoLRSP |= PopFriendly;
+ GPRsNoLRSP.reset(ARM::LR);
+ GPRsNoLRSP.reset(ARM::SP);
+ GPRsNoLRSP.reset(ARM::PC);
+ for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+ Register = GPRsNoLRSP.find_next(Register)) {
+ if (!UsedRegs.contains(Register)) {
+ // Remember the first pop-friendly register and exit.
+ if (PopFriendly.test(Register)) {
+ PopReg = Register;
+ TemporaryReg = 0;
+ break;
+ }
+ // Otherwise, remember that the register will be available to
+ // save a pop-friendly register.
+ TemporaryReg = Register;
}
}
+
+ if (!DoIt && !PopReg && !TemporaryReg)
+ return false;
+
+ assert((PopReg || TemporaryReg) && "Cannot get LR");
+
+ if (TemporaryReg) {
+ assert(!PopReg && "Unnecessary MOV is about to be inserted");
+ PopReg = PopFriendly.find_first();
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+ }
+
+ if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
+ // We couldn't use the direct restoration above, so
+ // perform the opposite conversion: tPOP_RET to tPOP.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ bool Popped = false;
+ for (auto MO: MBBI->operands())
+ if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
+ MO.getReg() != ARM::PC) {
+ MIB.addOperand(MO);
+ if (!MO.isImplicit())
+ Popped = true;
+ }
+ // Is there anything left to pop?
+ if (!Popped)
+ MBB.erase(MIB.getInstr());
+ // Erase the old instruction.
+ MBB.erase(MBBI);
+ MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ }
+
+ assert(PopReg && "Do not know how to get LR");
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ .addReg(PopReg, RegState::Define);
+
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill));
+
+ if (TemporaryReg)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill));
+
+ return true;
}
bool Thumb1FrameLowering::
@@ -461,8 +581,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
const TargetInstrInfo &TII = *STI.getInstrInfo();
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -501,31 +619,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *STI.getInstrInfo();
bool isVarArg = AFI->getArgRegsSaveSize() > 0;
- DebugLoc DL = MI->getDebugLoc();
+ DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
- bool NumRegs = false;
+ bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
- // Special epilogue for vararg functions. See emitEpilogue
- if (isVarArg)
- continue;
- // ARMv4T requires BX, see emitEpilogue
- if (STI.hasV4TOps() && !STI.hasV5TOps())
+ if (MBB.succ_empty()) {
+ // Special epilogue for vararg functions. See emitEpilogue
+ if (isVarArg)
+ continue;
+ // ARMv4T requires BX, see emitEpilogue
+ if (!STI.hasV5TOps())
+ continue;
+ Reg = ARM::PC;
+ (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+ if (MI != MBB.end())
+ MIB.copyImplicitOps(&*MI);
+ MI = MBB.erase(MI);
+ } else
+ // LR may only be popped into PC, as part of return sequence.
+ // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+ // to restore LR the hard way.
continue;
- Reg = ARM::PC;
- (*MIB).setDesc(TII.get(ARM::tPOP_RET));
- MIB.copyImplicitOps(&*MI);
- MI = MBB.erase(MI);
}
MIB.addReg(Reg, getDefRegState(true));
- NumRegs = true;
+ NeedsPop = true;
}
// It's illegal to emit pop instruction without operands.
- if (NumRegs)
+ if (NeedsPop)
MBB.insert(MI, &*MIB);
else
MF.DeleteMachineInstr(MIB);
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
index 31d5732..812f983 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,42 @@ public:
eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+
+ /// Check whether or not the given \p MBB can be used as a epilogue
+ /// for the target.
+ /// The epilogue will be inserted before the first terminator of that block.
+ /// This method is used by the shrink-wrapping pass to decide if
+ /// \p MBB will be correctly handled by the target.
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+private:
+ /// Check if the frame lowering of \p MF needs a special fixup
+ /// code sequence for the epilogue.
+ /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+ /// to LR, and we can't pop the value directly to the PC when
+ /// we need to update the SP after popping the value. So instead
+ /// we have to emit:
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// BX r3
+ /// If this would clobber a return value, then generate this sequence instead:
+ /// MOV ip, r3
+ /// POP {r3}
+ /// ADD sp, #offset
+ /// MOV lr, r3
+ /// MOV r3, ip
+ /// BX lr
+ bool needPopSpecialFixUp(const MachineFunction &MF) const;
+
+ /// Emit the special fixup code sequence for the epilogue.
+ /// \see needPopSpecialFixUp for more details.
+ /// \p DoIt, tells this method whether or not to actually insert
+ /// the code sequence in \p MBB. I.e., when \p DoIt is false,
+ /// \p MBB is left untouched.
+ /// \returns For \p DoIt == true: True when the emission succeeded
+ /// false otherwise. For \p DoIt == false: True when the emission
+ /// would have been possible, false otherwise.
+ bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 216e776..530e1d3 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -84,11 +84,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
@@ -112,11 +110,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
}
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 68736bc..bf0498d 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -256,8 +256,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
// Finalize the bundle.
- MachineBasicBlock::instr_iterator LI = LastITMI;
- finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI));
+ finalizeBundle(MBB, InsertPos.getInstrIterator(),
+ ++LastITMI->getIterator());
Modified = true;
++NumITs;
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index dc74f4e..4da769f 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -131,11 +131,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
@@ -171,11 +169,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index d9ab824..bcd0e57 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -125,7 +125,10 @@ namespace {
{ ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
- // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+ // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
+ // tSTMIA_UPD is a change in semantics which can only be used if the base
+ // register is killed. This difference is correctly handled elsewhere.
+ { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
{ ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
};
@@ -210,12 +213,12 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
- assert(false && "Duplicated entries?");
+ llvm_unreachable("Duplicated entries?");
}
}
static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
- for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+ for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
if (*Regs == ARM::CPSR)
return true;
return false;
@@ -435,6 +438,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
+ case ARM::t2STMIA: {
+ // If the base register is killed, we don't care what its value is after the
+ // instruction, so we can use an updating STMIA.
+ if (!MI->getOperand(0).isKill())
+ return false;
+
+ break;
+ }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -492,6 +503,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit load / store instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+
+ // tSTMIA_UPD takes a defining register operand. We've already checked that
+ // the register is killed, so mark it as dead here.
+ if (Entry.WideOpc == ARM::t2STMIA)
+ MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
+
if (!isLdStMul) {
MIB.addOperand(MI->getOperand(0));
MIB.addOperand(MI->getOperand(1));
@@ -633,10 +650,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Reg0 = MI->getOperand(0).getReg();
@@ -660,11 +676,13 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
}
} else if (Reg0 != Reg1) {
// Try to commute the operands to make it a 2-address instruction.
- unsigned CommOpIdx1, CommOpIdx2;
+ unsigned CommOpIdx1 = 1;
+ unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
- CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ MI->getOperand(CommOpIdx2).getReg() != Reg0)
return false;
- MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ MachineInstr *CommutedMI =
+ TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
if (!CommutedMI)
return false;
}
@@ -750,10 +768,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
- if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
- STI->avoidMOVsShifterOperand())
+ if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
// Don't issue movs with shifter operand for some CPUs unless we
- // are optimizing / minimizing for size.
+ // are optimizing for size.
return false;
unsigned Limit = ~0U;
@@ -1012,9 +1029,9 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
- // Optimizing / minimizing size?
- OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
- MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+ // Optimizing / minimizing size? Minimizing size implies optimizing for size.
+ OptimizeSize = MF.getFunction()->optForSize();
+ MinimizeSize = MF.getFunction()->optForMinSize();
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/contrib/llvm/lib/Target/AVR/AVR.td b/contrib/llvm/lib/Target/AVR/AVR.td
new file mode 100644
index 0000000..9e80717
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVR.td
@@ -0,0 +1,563 @@
+//===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+// This is the top level entry point for the AVR target.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===---------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===---------------------------------------------------------------------===//
+// AVR Subtarget Features.
+//===---------------------------------------------------------------------===//
+
+// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details
+// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
+// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
+// avr2 (with SRAM) adds the rest of the variants.
+// :TODO: s/AVRTiny/Tiny
+
+
+// A feature set aggregates features, grouping them. We don't want to create a
+// new member in AVRSubtarget (to store a value) for each set because we do not
+// care if the set is supported, only the subfeatures inside the set. We fix
+// this by simply setting the same dummy member for all feature sets, which is
+// then ignored.
+class FeatureSet<string name, string desc, list<SubtargetFeature> i>
+ : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+
+// A family of microcontrollers, defining a set of supported features.
+class Family<string name, list<SubtargetFeature> i>
+ : FeatureSet<name, !strconcat("The device is a part of the ",
+ name, " family"), i>;
+
+// The device has SRAM, and supports the bare minimum of
+// SRAM-relevant instructions.
+//
+// These are:
+// LD - all 9 variants
+// ST - all 9 variants
+// LDD - two variants for Y and Z
+// STD - two variants for Y and Z
+// `LDS Rd, K`
+// `STS k, Rr`
+// `PUSH`/`POP`
+def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
+ "The device has random access memory">;
+
+// The device supports the `JMP k` and `CALL k` instructions.
+def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+ "The device supports the `JMP` and "
+ "`CALL` instructions">;
+
+
+// The device supports the indirect branches `IJMP` and `ICALL`.
+def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
+ "true",
+ "The device supports `IJMP`/`ICALL`"
+ "instructions">;
+
+// The device supports the extended indirect branches `EIJMP` and `EICALL`.
+def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
+ "true", "The device supports the "
+ "`EIJMP`/`EICALL` instructions">;
+
+// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
+def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
+ "true", "Enable 16-bit register-immediate "
+ "addition and subtraction instructions">;
+
+// The device has an 8-bit stack pointer (SP) register.
+def FeatureSmallStack : SubtargetFeature<"smallstack", "m_hasSmallStack",
+ "true", "The device has an 8-bit "
+ "stack pointer">;
+
+// The device supports the 16-bit GPR pair MOVW instruction.
+def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
+ "The device supports the 16-bit MOVW "
+ "instruction">;
+
+// The device supports the `LPM` instruction, with implied destination being r0.
+def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
+ "The device supports the `LPM` instruction">;
+
+// The device supports the `LPM Rd, Z[+] instruction.
+def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+ "The device supports the `LPM Rd, Z[+]` "
+ "instruction">;
+
+// The device supports the `ELPM` instruction.
+def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
+ "The device supports the ELPM instruction">;
+
+// The device supports the `ELPM Rd, Z[+]` instructions.
+def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+ "The device supports the `ELPM Rd, Z[+]` "
+ "instructions">;
+
+// The device supports the `SPM` instruction.
+def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
+ "The device supports the `SPM` instruction">;
+
+// The device supports the `SPM Z+` instruction.
+def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+ "The device supports the `SPM Z+` "
+ "instruction">;
+
+// The device supports the `DES k` instruction.
+def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
+ "The device supports the `DES k` encryption "
+ "instruction">;
+
+// The device supports the Read-Write-Modify instructions
+// XCH, LAS, LAC, and LAT.
+def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+ "The device supports the read-write-modify "
+ "instructions: XCH, LAS, LAC, LAT">;
+
+// The device supports the `[F]MUL[S][U]` family of instructions.
+def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
+ "true", "The device supports the "
+ "multiplication instructions">;
+
+// The device supports the `BREAK` instruction.
+def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
+ "The device supports the `BREAK` debugging "
+ "instruction">;
+
+// The device has instruction encodings specific to the Tiny core.
+def FeatureTinyEncoding : SubtargetFeature<"tinyencoding",
+ "m_hasTinyEncoding", "true",
+ "The device has Tiny core specific "
+ "instruction encodings">;
+
+class ELFArch<string name> : SubtargetFeature<"", "ELFArch",
+ !strconcat("ELF::",name), "">;
+
+// ELF e_flags architecture values
+def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+
+//===---------------------------------------------------------------------===//
+// AVR Families
+//===---------------------------------------------------------------------===//
+
+// The device has at least the bare minimum that **every** single AVR
+// device should have.
+def FamilyAVR0 : Family<"avr0", []>;
+
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
+
+def FamilyAVR2 : Family<"avr2",
+ [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
+ FeatureSRAM]>;
+
+def FamilyAVR25 : Family<"avr25",
+ [FamilyAVR2, FeatureMOVW, FeatureLPMX,
+ FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR3 : Family<"avr3",
+ [FamilyAVR2, FeatureJMPCALL]>;
+
+def FamilyAVR31 : Family<"avr31",
+ [FamilyAVR3, FeatureELPM]>;
+
+def FamilyAVR35 : Family<"avr35",
+ [FamilyAVR3, FeatureMOVW, FeatureLPMX,
+ FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR4 : Family<"avr4",
+ [FamilyAVR2, FeatureMultiplication,
+ FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK]>;
+
+def FamilyAVR5 : Family<"avr5",
+ [FamilyAVR3, FeatureMultiplication,
+ FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK]>;
+
+def FamilyAVR51 : Family<"avr51",
+ [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+
+def FamilyAVR6 : Family<"avr6",
+ [FamilyAVR51]>;
+
+def FamilyAVRTiny : Family<"avrtiny",
+ [FamilyAVR0, FeatureBREAK, FeatureSRAM,
+ FeatureTinyEncoding]>;
+
+def FamilyXMEGA : Family<"xmega",
+ [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
+ FeatureDES]>;
+
+def FamilyXMEGAU : Family<"xmegau",
+ [FamilyXMEGA, FeatureRMW]>;
+
+def FeatureSetSpecial : FeatureSet<"special",
+ "Enable use of the entire instruction "
+ "set - used for debugging",
+ [FeatureSRAM, FeatureJMPCALL,
+ FeatureIJMPCALL, FeatureEIJMPCALL,
+ FeatureADDSUBIW, FeatureMOVW,
+ FeatureLPM, FeatureLPMX, FeatureELPM,
+ FeatureELPMX, FeatureSPM, FeatureSPMX,
+ FeatureDES, FeatureRMW,
+ FeatureMultiplication, FeatureBREAK]>;
+
+//===---------------------------------------------------------------------===//
+// AVR microcontrollers supported.
+//===---------------------------------------------------------------------===//
+
+class Device<string Name, Family Fam, ELFArch Arch,
+ list<SubtargetFeature> ExtraFeatures = []>
+ : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+
+// Generic MCUs
+// Note that several versions of GCC has strange ELF architecture
+// settings for backwards compatibility - see `gas/config/tc-avr.c`
+// in AVR binutils. We do not replicate this.
+def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
+def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
+def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
+def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
+def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
+def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
+def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
+def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
+def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>;
+
+// Specific MCUs
+def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25,
+ [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
+def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
+def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8", FamilyAVR4, ELFArchAVR4>; // FIXME: family may be wrong
+def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
+ [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
+def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega8e5", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>;
+
+//===---------------------------------------------------------------------===//
+// Register File Description
+//===---------------------------------------------------------------------===//
+
+include "AVRRegisterInfo.td"
+
+//===---------------------------------------------------------------------===//
+// Instruction Descriptions
+//===---------------------------------------------------------------------===//
+
+//include "AVRInstrInfo.td"
+
+//def AVRInstrInfo : InstrInfo;
+
+//===---------------------------------------------------------------------===//
+// Calling Conventions
+//===---------------------------------------------------------------------===//
+
+include "AVRCallingConv.td"
+
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmWriter : AsmWriter {
+// string AsmWriterClassName = "InstPrinter";
+// bit isMCAsmWriter = 1;
+// }
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmParser : AsmParser {
+// let ShouldEmitMatchRegisterName = 1;
+// let ShouldEmitMatchRegisterAltName = 1;
+// }
+
+// def AVRAsmParserVariant : AsmParserVariant {
+// int Variant = 0;
+//
+// // Recognize hard coded registers.
+// string RegisterPrefix = "$";
+// }
+
+//===---------------------------------------------------------------------===//
+// Target Declaration
+//===---------------------------------------------------------------------===//
+
+def AVR : Target {
+// let InstructionSet = AVRInstrInfo;
+// let AssemblyWriters = [AVRAsmWriter];
+//
+// let AssemblyParsers = [AVRAsmParser];
+// let AssemblyParserVariants = [AVRAsmParserVariant];
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRCallingConv.td b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
new file mode 100644
index 0000000..d8cb3fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -0,0 +1,65 @@
+//===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AVR architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AVR Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_AVR : CallingConv
+<[
+ // i8 is returned in R24.
+ CCIfType<[i8], CCAssignToReg<[R24]>>,
+
+ // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18.
+ CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>>
+]>;
+
+// Special return value calling convention for runtime functions.
+def RetCC_AVR_RT : CallingConv
+<[
+ CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
+ CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AVR Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// The calling conventions are implemented in custom C++ code
+
+// Calling convention for variadic functions.
+def ArgCC_AVR_Vararg : CallingConv
+<[
+ // i16 are always passed through the stack with an alignment of 1.
+ CCAssignToStack<2, 1>
+]>;
+
+// Special argument calling convention for
+// multiplication runtime functions.
+def ArgCC_AVR_RT_MUL : CallingConv
+<[
+ CCIfType<[i16], CCAssignToReg<[R27R26,R19R18]>>
+]>;
+
+// Special argument calling convention for
+// division runtime functions.
+def ArgCC_AVR_RT_DIV : CallingConv
+<[
+ CCIfType<[i8], CCAssignToReg<[R24,R22]>>,
+ CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
+def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>;
diff --git a/contrib/llvm/lib/Target/AVR/AVRConfig.h b/contrib/llvm/lib/Target/AVR/AVRConfig.h
new file mode 100644
index 0000000..65588bc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRConfig.h
@@ -0,0 +1,15 @@
+//===-- AVRConfig.h - AVR Backend Configuration Header ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_CONFIG_H
+#define LLVM_AVR_CONFIG_H
+
+#define LLVM_AVR_GCC_COMPAT
+
+#endif // LLVM_AVR_CONFIG_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
new file mode 100644
index 0000000..6571d5d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -0,0 +1,73 @@
+//===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AVR-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_MACHINE_FUNCTION_INFO_H
+#define LLVM_AVR_MACHINE_FUNCTION_INFO_H
+
+#include "AVRConfig.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/**
+ * Contains AVR-specific information for each MachineFunction.
+ */
+class AVRMachineFunctionInfo : public MachineFunctionInfo {
+ /// Indicates if a register has been spilled by the register
+ /// allocator.
+ bool HasSpills;
+
+ /// Indicates if there are any fixed size allocas present.
+ /// Note that if there are only variable sized allocas this is set to false.
+ bool HasAllocas;
+
+ /// Indicates if arguments passed using the stack are being
+ /// used inside the function.
+ bool HasStackArgs;
+
+ /// Size of the callee-saved register portion of the
+ /// stack frame in bytes.
+ unsigned CalleeSavedFrameSize;
+
+ /// FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ AVRMachineFunctionInfo()
+ : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+ CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+ explicit AVRMachineFunctionInfo(MachineFunction &MF)
+ : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+ CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+ bool getHasSpills() const { return HasSpills; }
+ void setHasSpills(bool B) { HasSpills = B; }
+
+ bool getHasAllocas() const { return HasAllocas; }
+ void setHasAllocas(bool B) { HasAllocas = B; }
+
+ bool getHasStackArgs() const { return HasStackArgs; }
+ void setHasStackArgs(bool B) { HasStackArgs = B; }
+
+ unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+ void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+};
+
+} // end llvm namespace
+
+#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
new file mode 100644
index 0000000..32650fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -0,0 +1,216 @@
+//===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the AVR register file
+//===----------------------------------------------------------------------===//
+
+// 8-bit General purpose register definition.
+class AVRReg<bits<16> num,
+ string name,
+ list<Register> subregs = [],
+ list<string> altNames = []>
+ : RegisterWithSubRegs<name, subregs>
+{
+ field bits<16> Num = num;
+
+ let HWEncoding = num;
+ let Namespace = "AVR";
+ let SubRegs = subregs;
+ let AltNames = altNames;
+}
+
+// Subregister indices.
+let Namespace = "AVR" in
+{
+ def sub_lo : SubRegIndex<8>;
+ def sub_hi : SubRegIndex<8, 8>;
+}
+
+let Namespace = "AVR" in {
+ def ptr : RegAltNameIndex;
+}
+
+
+//===----------------------------------------------------------------------===//
+// 8-bit general purpose registers
+//===----------------------------------------------------------------------===//
+
+def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
+def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
+def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AVRReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AVRReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AVRReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AVRReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AVRReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AVRReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AVRReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AVRReg<20, "r20">, DwarfRegNum<[20]>;
+def R21 : AVRReg<21, "r21">, DwarfRegNum<[21]>;
+def R22 : AVRReg<22, "r22">, DwarfRegNum<[22]>;
+def R23 : AVRReg<23, "r23">, DwarfRegNum<[23]>;
+def R24 : AVRReg<24, "r24">, DwarfRegNum<[24]>;
+def R25 : AVRReg<25, "r25">, DwarfRegNum<[25]>;
+def R26 : AVRReg<26, "r26">, DwarfRegNum<[26]>;
+def R27 : AVRReg<27, "r27">, DwarfRegNum<[27]>;
+def R28 : AVRReg<28, "r28">, DwarfRegNum<[28]>;
+def R29 : AVRReg<29, "r29">, DwarfRegNum<[29]>;
+def R30 : AVRReg<30, "r30">, DwarfRegNum<[30]>;
+def R31 : AVRReg<31, "r31">, DwarfRegNum<[31]>;
+def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>;
+def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>;
+
+let SubRegIndices = [sub_lo, sub_hi],
+CoveredBySubRegs = 1 in
+{
+ // 16 bit GPR pairs.
+ def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
+
+ // The pointer registers (X,Y,Z) are a special case because they
+ // are printed as a `high:low` pair when a DREG is expected,
+ // but printed using `X`, `Y`, `Z` when a pointer register is expected.
+ let RegAltNameIndices = [ptr] in {
+ def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
+ def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
+ def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
+ }
+ def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>;
+ def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>;
+ def R21R20 : AVRReg<20, "r21:r20", [R20, R21]>, DwarfRegNum<[20]>;
+ def R19R18 : AVRReg<18, "r19:r18", [R18, R19]>, DwarfRegNum<[18]>;
+ def R17R16 : AVRReg<16, "r17:r16", [R16, R17]>, DwarfRegNum<[16]>;
+ def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>;
+ def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>;
+ def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>;
+ def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
+ def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
+ def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
+ def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
+ def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+//:TODO: use proper set instructions instead of using always "add"
+
+// Main 8-bit register class.
+def GPR8 : RegisterClass<"AVR", [i8], 8,
+ (
+ // Return value and argument registers.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+ R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+ )>;
+
+// Simple lower registers r0..r15
+def GPR8lo : RegisterClass<"AVR", [i8], 8,
+ (
+ add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+ )>;
+
+// 8-bit register class for instructions which take immediates.
+def LD8 : RegisterClass<"AVR", [i8], 8,
+ (
+ // Return value and arguments.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16
+ )>;
+
+// Simple lower registers r16..r23
+def LD8lo : RegisterClass<"AVR", [i8], 8,
+ (
+ add R23, R22, R21, R20, R19, R18, R17, R16
+ )>;
+
+// Main 16-bit pair register class.
+def DREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10,
+ R9R8, R7R6, R5R4, R3R2, R1R0
+ )>;
+
+// 16-bit register class for immediate instructions.
+def DLDREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16
+ )>;
+
+// 16-bit register class for the adiw/sbiw instructions.
+def IWREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28
+ )>;
+
+// 16-bit register class for the ld and st instructions.
+// AKA X,Y, and Z
+def PTRREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ add R27R26, // X
+ R29R28, // Y
+ R31R30 // Z
+ ), ptr>;
+
+// 16-bit register class for the ldd and std instructions.
+// AKA Y and Z.
+def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
+ (
+ add R31R30, R29R28
+ ), ptr>;
+
+// We have a bunch of instructions with an explicit Z register argument. We
+// model this using a register class containing only the Z register.
+// :TODO: Rename to 'ZREG'.
+def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
+
+// Register class used for the stack read pseudo instruction.
+def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
+
+//:TODO: if we remove this we get an error in tablegen
+//:TODO: this is just a hack, remove it once add16 works!
+// Status register.
+def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
+def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
+{
+ let CopyCost = -1; // Don't allow copying of status registers
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
new file mode 100644
index 0000000..a91dce8
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -0,0 +1,4 @@
+
+extern "C" void LLVMInitializeAVRTarget() {
+
+}
diff --git a/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
new file mode 100644
index 0000000..c0e0d20
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+Target TheAVRTarget;
+}
+
+extern "C" void LLVMInitializeAVRTargetInfo() {
+ llvm::RegisterTarget<llvm::Triple::avr> X(
+ llvm::TheAVRTarget, "avr", "Atmel AVR Microcontroller");
+}
+
+// FIXME: Temporary stub - this function must be defined for linking
+// to succeed. Remove once this function is properly implemented.
+extern "C" void LLVMInitializeAVRTargetMC() {
+}
diff --git a/contrib/llvm/lib/Target/BPF/BPF.td b/contrib/llvm/lib/Target/BPF/BPF.td
index a4ce90a..8493b0f 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.td
+++ b/contrib/llvm/lib/Target/BPF/BPF.td
@@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter {
bit isMCAsmWriter = 1;
}
+def BPFAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "BPF";
+ string BreakCharacters = ".";
+}
+
def BPF : Target {
let InstructionSet = BPFInstrInfo;
let AssemblyWriters = [BPFInstPrinter];
+ let AssemblyParserVariants = [BPFAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
index 7341828..6a5b37e 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -547,8 +547,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// ThisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index adcaff6..4276d08 100644
--- a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -17,8 +17,6 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
-class MCOperand;
-
class BPFInstPrinter : public MCInstPrinter {
public:
BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 36f9926..8c358ca 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -68,16 +68,23 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
- return;
- }
- assert(Fixup.getKind() == FK_PCRel_2);
- Value = (uint16_t)((Value - 8) / 8);
- if (IsLittleEndian) {
- Data[Fixup.getOffset() + 2] = Value & 0xFF;
- Data[Fixup.getOffset() + 3] = Value >> 8;
+ } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
+ unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8;
+
+ for (unsigned i = 0; i != Size; ++i) {
+ unsigned Idx = IsLittleEndian ? i : Size - i;
+ Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8));
+ }
} else {
- Data[Fixup.getOffset() + 2] = Value >> 8;
- Data[Fixup.getOffset() + 3] = Value & 0xFF;
+ assert(Fixup.getKind() == FK_PCRel_2);
+ Value = (uint16_t)((Value - 8) / 8);
+ if (IsLittleEndian) {
+ Data[Fixup.getOffset() + 2] = Value & 0xFF;
+ Data[Fixup.getOffset() + 3] = Value >> 8;
+ } else {
+ Data[Fixup.getOffset() + 2] = Value >> 8;
+ Data[Fixup.getOffset() + 3] = Value & 0xFF;
+ }
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 05ba618..87cdd5e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -44,6 +44,10 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
return ELF::R_X86_64_64;
case FK_SecRel_4:
return ELF::R_X86_64_PC32;
+ case FK_Data_8:
+ return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+ case FK_Data_4:
+ return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
}
}
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index d63bbf4..1f440fe 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -34,6 +34,8 @@ public:
UsesELFSectionDirectiveForBSS = true;
HasSingleParameterDotFile = false;
HasDotTypeDotSizeDirective = false;
+
+ SupportsDebugInformation = true;
}
};
}
diff --git a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
index 272688e..5ea6551 100644
--- a/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
+++ b/contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp
@@ -551,7 +551,8 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
void CppWriter::printType(Type* Ty) {
// We don't print definitions for primitive types
if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() ||
- Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy())
+ Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy() ||
+ Ty->isTokenTy())
return;
// If we already defined this type, we don't need to define it again.
@@ -1355,23 +1356,18 @@ void CppWriter::printInstruction(const Instruction *I,
}
case Instruction::GetElementPtr: {
const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
- if (gep->getNumOperands() <= 2) {
- Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
- << opNames[0];
- if (gep->getNumOperands() == 2)
- Out << ", " << opNames[1];
- } else {
- Out << "std::vector<Value*> " << iName << "_indices;";
- nl(Out);
- for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
- Out << iName << "_indices.push_back("
- << opNames[i] << ");";
- nl(Out);
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << getCppName(gep->getSourceElementType()) << ", " << opNames[0] << ", {";
+ in();
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ if (i != 1) {
+ Out << ", ";
}
- Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices";
+ nl(Out);
+ Out << opNames[i];
}
- Out << ", \"";
+ out();
+ nl(Out) << "}, \"";
printEscapedString(gep->getName());
Out << "\", " << bbname << ");";
break;
@@ -1803,13 +1799,12 @@ void CppWriter::printFunctionBody(const Function *F) {
<< "->arg_begin();";
nl(Out);
}
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << "Value* " << getCppName(AI) << " = args++;";
+ for (const Argument &AI : F->args()) {
+ Out << "Value* " << getCppName(&AI) << " = args++;";
nl(Out);
- if (AI->hasName()) {
- Out << getCppName(AI) << "->setName(\"";
- printEscapedString(AI->getName());
+ if (AI.hasName()) {
+ Out << getCppName(&AI) << "->setName(\"";
+ printEscapedString(AI.getName());
Out << "\");";
nl(Out);
}
@@ -1818,29 +1813,25 @@ void CppWriter::printFunctionBody(const Function *F) {
// Create all the basic blocks
nl(Out);
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
+ for (const BasicBlock &BI : *F) {
+ std::string bbname(getCppName(&BI));
Out << "BasicBlock* " << bbname <<
" = BasicBlock::Create(mod->getContext(), \"";
- if (BI->hasName())
- printEscapedString(BI->getName());
- Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ if (BI.hasName())
+ printEscapedString(BI.getName());
+ Out << "\"," << getCppName(BI.getParent()) << ",0);";
nl(Out);
}
// Output all of its basic blocks... for the function
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ for (const BasicBlock &BI : *F) {
+ std::string bbname(getCppName(&BI));
+ nl(Out) << "// Block " << BI.getName() << " (" << bbname << ")";
nl(Out);
// Output all of the instructions in the basic block...
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
- I != E; ++I) {
- printInstruction(I,bbname);
- }
+ for (const Instruction &I : BI)
+ printInstruction(&I, bbname);
}
// Loop over the ForwardRefs and resolve them now that all instructions
@@ -1883,7 +1874,7 @@ void CppWriter::printInline(const std::string& fname,
printFunctionUses(F);
printFunctionBody(F);
is_inline = false;
- Out << "return " << getCppName(F->begin()) << ";";
+ Out << "return " << getCppName(&F->front()) << ";";
nl(Out) << "}";
nl(Out);
}
@@ -1896,17 +1887,14 @@ void CppWriter::printModuleBody() {
// Functions can call each other and global variables can reference them so
// define all the functions first before emitting their function bodies.
nl(Out) << "// Function Declarations"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I)
- printFunctionHead(I);
+ for (const Function &I : *TheModule)
+ printFunctionHead(&I);
// Process the global variables declarations. We can't initialze them until
// after the constants are printed so just print a header for each global
nl(Out) << "// Global Variable Declarations\n"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableHead(I);
- }
+ for (const GlobalVariable &I : TheModule->globals())
+ printVariableHead(&I);
// Print out all the constants definitions. Constants don't recurse except
// through GlobalValues. All GlobalValues have been declared at this point
@@ -1918,21 +1906,18 @@ void CppWriter::printModuleBody() {
// been emitted. These definitions just couple the gvars with their constant
// initializers.
nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableBody(I);
- }
+ for (const GlobalVariable &I : TheModule->globals())
+ printVariableBody(&I);
// Finally, we can safely put out all of the function bodies.
nl(Out) << "// Function Definitions"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I) {
- if (!I->isDeclaration()) {
- nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ for (const Function &I : *TheModule) {
+ if (!I.isDeclaration()) {
+ nl(Out) << "// Function: " << I.getName() << " (" << getCppName(&I)
<< ")";
nl(Out) << "{";
nl(Out,1);
- printFunctionBody(I);
+ printFunctionBody(&I);
nl(Out,-1) << "}";
nl(Out);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
new file mode 100644
index 0000000..a8622a9
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -0,0 +1,2152 @@
+//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcasmparser"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCExpr.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableFutureRegs("mfuture-regs",
+ cl::desc("Enable future registers"));
+
+static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
+cl::desc("Warn for missing parenthesis around predicate registers"),
+cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
+cl::desc("Error for missing parenthesis around predicate registers"),
+cl::init(false));
+static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
+cl::desc("Warn for mismatching a signed and unsigned value"),
+cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
+cl::desc("Warn for register names that arent contigious"),
+cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
+cl::desc("Error for register names that aren't contigious"),
+cl::init(false));
+
+
+namespace {
+struct HexagonOperand;
+
+class HexagonAsmParser : public MCTargetAsmParser {
+
+ HexagonTargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+ return static_cast<HexagonTargetStreamer &>(TS);
+ }
+
+ MCAsmParser &Parser;
+ MCAssembler *Assembler;
+ MCInstrInfo const &MCII;
+ MCInst MCB;
+ bool InBrackets;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAssembler *getAssembler() const { return Assembler; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ bool equalIsAsmAssignment() override { return false; }
+ bool isLabel(AsmToken &Token) override;
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ bool ParseDirectiveFalign(unsigned Size, SMLoc L);
+
+ virtual bool ParseRegister(unsigned &RegNo,
+ SMLoc &StartLoc,
+ SMLoc &EndLoc) override;
+ bool ParseDirectiveSubsection(SMLoc L);
+ bool ParseDirectiveValue(unsigned Size, SMLoc L);
+ bool ParseDirectiveComm(bool IsLocal, SMLoc L);
+ bool RegisterMatchesArch(unsigned MatchNum) const;
+
+ bool matchBundleOptions();
+ bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc);
+ bool finishBundle(SMLoc IDLoc, MCStreamer &Out);
+ void canonicalizeImmediates(MCInst &MCI);
+ bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc,
+ OperandVector &InstOperands, uint64_t &ErrorInfo,
+ bool MatchingInlineAsm, bool &MustExtend);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+ void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+ int processInstruction(MCInst &Inst, OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend);
+
+ // Check if we have an assembler and, if so, set the ELF e_header flags.
+ void chksetELFHeaderEFlags(unsigned flags) {
+ if (getAssembler())
+ getAssembler()->setELFHeaderEFlags(flags);
+ }
+
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "HexagonGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, _STI), Parser(_Parser),
+ MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+ MCAsmParserExtension::Initialize(_Parser);
+
+ Assembler = nullptr;
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ Assembler = &MES->getAssembler();
+ }
+ }
+
+ bool mustExtend(OperandVector &Operands);
+ bool splitIdentifier(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands);
+ bool parseInstruction(OperandVector &Operands);
+ bool implicitExpressionLocation(OperandVector &Operands);
+ bool parseExpressionOrOperand(OperandVector &Operands);
+ bool parseExpression(MCExpr const *& Expr);
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override
+ {
+ llvm_unreachable("Unimplemented");
+ }
+ virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ AsmToken ID, OperandVector &Operands) override;
+
+ virtual bool ParseDirective(AsmToken DirectiveID) override;
+};
+
+/// HexagonOperand - Instances of this class represent a parsed Hexagon machine
+/// instruction.
+struct HexagonOperand : public MCParsedAsmOperand {
+ enum KindTy { Token, Immediate, Register } Kind;
+
+ SMLoc StartLoc, EndLoc;
+
+ struct TokTy {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct RegTy {
+ unsigned RegNum;
+ };
+
+ struct ImmTy {
+ const MCExpr *Val;
+ bool MustExtend;
+ };
+
+ struct InstTy {
+ OperandVector *SubInsts;
+ };
+
+ union {
+ struct TokTy Tok;
+ struct RegTy Reg;
+ struct ImmTy Imm;
+ };
+
+ HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+public:
+ HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case Register:
+ Reg = o.Reg;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Token:
+ Tok = o.Tok;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const { return EndLoc; }
+
+ unsigned getReg() const {
+ assert(Kind == Register && "Invalid access!");
+ return Reg.RegNum;
+ }
+
+ const MCExpr *getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ bool isToken() const { return Kind == Token; }
+ bool isImm() const { return Kind == Immediate; }
+ bool isMem() const { llvm_unreachable("No isMem"); }
+ bool isReg() const { return Kind == Register; }
+
+ bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
+ bool isRelocatable, bool Extendable) const {
+ if (Kind == Immediate) {
+ const MCExpr *myMCExpr = getImm();
+ if (Imm.MustExtend && !Extendable)
+ return false;
+ int64_t Res;
+ if (myMCExpr->evaluateAsAbsolute(Res)) {
+ int bits = immBits + zeroBits;
+ // Field bit range is zerobits + bits
+ // zeroBits must be 0
+ if (Res & ((1 << zeroBits) - 1))
+ return false;
+ if (isSigned) {
+ if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1)))
+ return true;
+ } else {
+ if (bits == 64)
+ return true;
+ if (Res >= 0)
+ return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+ else {
+ const int64_t high_bit_set = 1ULL << 63;
+ const uint64_t mask = (high_bit_set >> (63 - bits));
+ return (((uint64_t)Res & mask) == mask) ? true : false;
+ }
+ }
+ } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
+ return true;
+ else if (myMCExpr->getKind() == MCExpr::Binary ||
+ myMCExpr->getKind() == MCExpr::Unary)
+ return true;
+ }
+ return false;
+ }
+
+ bool isf32Ext() const { return false; }
+ bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+ bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
+ bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+ bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
+ bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
+ bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
+ bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
+ bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
+ bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
+ bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
+ bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+
+ bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
+ bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+ bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
+ bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
+ bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
+ bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
+ bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
+ bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
+ bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+ bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+ bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
+ bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
+ bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
+ bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+ bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
+ bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
+ bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
+ bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
+ bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
+ bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+ bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+
+ bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+ bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+ bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+ bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+ bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+ bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+ bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+ bool iss11_0Ext() const {
+ return CheckImmRange(11 + 26, 0, true, true, true);
+ }
+ bool iss11_1Ext() const {
+ return CheckImmRange(11 + 26, 1, true, true, true);
+ }
+ bool iss11_2Ext() const {
+ return CheckImmRange(11 + 26, 2, true, true, true);
+ }
+ bool iss11_3Ext() const {
+ return CheckImmRange(11 + 26, 3, true, true, true);
+ }
+
+ bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+ bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+ bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+ bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+ bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+ bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
+ bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
+ bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
+ bool isu32MustExt() const { return isImm() && Imm.MustExtend; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getReg()));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getImm()));
+ }
+
+ void addSignedImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ MCExpr const *Expr = getImm();
+ int64_t Value;
+ if (!Expr->evaluateAsAbsolute(Value)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ int64_t Extended = SignExtend64 (Value, 32);
+ if ((Extended < 0) == (Value < 0)) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+ // Flip bit 33 to signal signed unsigned mismatch
+ Extended ^= 0x100000000;
+ Inst.addOperand(MCOperand::createImm(Extended));
+ }
+
+ void addf32ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+ void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addSignedImmOperands(Inst, N);
+ }
+
+ void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+ void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
+ }
+
+ void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ virtual void print(raw_ostream &OS) const;
+
+ static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
+ HexagonOperand *Op = new HexagonOperand(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Register);
+ Op->Reg.RegNum = RegNum;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+
+ static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ HexagonOperand *Op = new HexagonOperand(Immediate);
+ Op->Imm.Val = Val;
+ Op->Imm.MustExtend = false;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return std::unique_ptr<HexagonOperand>(Op);
+ }
+};
+
+} // end anonymous namespace.
+
+void HexagonOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Immediate:
+ getImm()->print(OS, nullptr);
+ break;
+ case Register:
+ OS << "<register R";
+ OS << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+static unsigned MatchRegisterName(StringRef Name);
+
+bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
+ DEBUG(dbgs() << "Bundle:");
+ DEBUG(MCB.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "--\n");
+
+ // Check the bundle for errors.
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+
+ bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
+ getContext(), MCB,
+ &Check);
+
+ while (Check.getNextErrInfo() == true) {
+ unsigned Reg = Check.getErrRegister();
+ Twine R(RI->getName(Reg));
+
+ uint64_t Err = Check.getError();
+ if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
+ Error(IDLoc,
+ "unconditional branch cannot precede another branch in packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
+ HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
+ Error(IDLoc, "register `" + R +
+ "' used with `.new' "
+ "but not validly modified in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
+ Error(IDLoc, "register `" + R + "' modified more than once");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
+ Error(IDLoc, "cannot write to read-only register `" + R + "'");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
+ Error(IDLoc, "loop-setup and some branch instructions "
+ "cannot be in the same packet");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
+ Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+ Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+ "cannot contain instructions that modify register " +
+ "`" + R + "'");
+ }
+
+ if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
+ Error(IDLoc,
+ "instruction cannot appear in packet with other instructions");
+
+ if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
+ Error(IDLoc, "too many slots used in packet");
+
+ if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
+ uint64_t Erm = Check.getShuffleError();
+
+ if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
+ Error(IDLoc, "invalid instruction packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
+ Error(IDLoc, "invalid instruction packet: too many stores");
+ else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
+ Error(IDLoc, "invalid instruction packet: too many loads");
+ else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
+ Error(IDLoc, "too many branches in packet");
+ else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
+ Error(IDLoc, "invalid instruction packet: slot error");
+ else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
+ Error(IDLoc, "v60 packet violation");
+ else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
+ Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+ else
+ Error(IDLoc, "unknown error in instruction packet");
+ }
+ }
+
+ unsigned Warn = Check.getWarning();
+ if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
+ if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.cur' "
+ "but not used in the same packet");
+ else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
+ Warning(IDLoc, "register `" + R + "' used with `.tmp' "
+ "but not used in the same packet");
+ }
+ }
+
+ if (CheckOk) {
+ MCB.setLoc(IDLoc);
+ if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
+ assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
+ assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
+ // Empty packets are valid yet aren't emitted
+ return false;
+ }
+ Out.EmitInstruction(MCB, getSTI());
+ } else {
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+ Error(IDLoc, "invalid instruction packet: out of slots");
+ return true; // Error
+ }
+ }
+
+ return false; // No error
+}
+
+bool HexagonAsmParser::matchBundleOptions() {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ if (!Parser.getTok().is(AsmToken::Colon))
+ return false;
+ Lexer.Lex();
+ StringRef Option = Parser.getTok().getString();
+ if (Option.compare_lower("endloop0") == 0)
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ else if (Option.compare_lower("endloop1") == 0)
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ else if (Option.compare_lower("mem_noshuf") == 0)
+ HexagonMCInstrInfo::setMemReorderDisabled(MCB);
+ else if (Option.compare_lower("mem_shuf") == 0)
+ HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
+ else
+ return true;
+ Lexer.Lex();
+ }
+}
+
+// For instruction aliases, immediates are generated rather than
+// MCConstantExpr. Convert them for uniform MCExpr.
+// Also check for signed/unsigned mismatches and warn
+void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
+ MCInst NewInst;
+ NewInst.setOpcode(MCI.getOpcode());
+ for (MCOperand &I : MCI)
+ if (I.isImm()) {
+ int64_t Value (I.getImm());
+ if ((Value & 0x100000000) != (Value & 0x80000000)) {
+ // Detect flipped bit 33 wrt bit 32 and signal warning
+ Value ^= 0x100000000;
+ if (WarnSignedMismatch)
+ Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
+ }
+ NewInst.addOperand(MCOperand::createExpr(
+ MCConstantExpr::create(Value, getContext())));
+ }
+ else
+ NewInst.addOperand(I);
+ MCI = NewInst;
+}
+
+bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
+ OperandVector &InstOperands,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm,
+ bool &MustExtend) {
+ // Perform matching with tablegen asmmatcher generated function
+ int result =
+ MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm);
+ if (result == Match_Success) {
+ MCI.setLoc(IDLoc);
+ MustExtend = mustExtend(InstOperands);
+ canonicalizeImmediates(MCI);
+ result = processInstruction(MCI, InstOperands, IDLoc, MustExtend);
+
+ DEBUG(dbgs() << "Insn:");
+ DEBUG(MCI.dump_pretty(dbgs()));
+ DEBUG(dbgs() << "\n\n");
+
+ MCI.setLoc(IDLoc);
+ }
+
+ // Create instruction operand for bundle instruction
+ // Break this into a separate function Code here is less readable
+ // Think about how to get an instruction error to report correctly.
+ // SMLoc will return the "{"
+ switch (result) {
+ default:
+ break;
+ case Match_Success:
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "invalid instruction");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction");
+ case Match_InvalidOperand:
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= InstOperands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get()))
+ ->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool HexagonAsmParser::mustExtend(OperandVector &Operands) {
+ unsigned Count = 0;
+ for (std::unique_ptr<MCParsedAsmOperand> &i : Operands)
+ if (i->isImm())
+ if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend)
+ ++Count;
+ // Multiple extenders should have been filtered by iss9Ext et. al.
+ assert(Count < 2 && "Multiple extenders");
+ return Count == 1;
+}
+
+bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) {
+ if (!InBrackets) {
+ MCB.clear();
+ MCB.addOperand(MCOperand::createImm(0));
+ }
+ HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]);
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "{") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (InBrackets) {
+ getParser().Error(IDLoc, "Already in a packet");
+ return true;
+ }
+ InBrackets = true;
+ return false;
+ }
+ if (FirstOperand.isToken() && FirstOperand.getToken() == "}") {
+ assert(Operands.size() == 1 && "Brackets should be by themselves");
+ if (!InBrackets) {
+ getParser().Error(IDLoc, "Not in a packet");
+ return true;
+ }
+ InBrackets = false;
+ if (matchBundleOptions())
+ return true;
+ return finishBundle(IDLoc, Out);
+ }
+ MCInst *SubInst = new (getParser().getContext()) MCInst;
+ bool MustExtend = false;
+ if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
+ MatchingInlineAsm, MustExtend))
+ return true;
+ HexagonMCInstrInfo::extendIfNeeded(
+ getParser().getContext(), MCII, MCB, *SubInst,
+ HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend);
+ MCB.addOperand(MCOperand::createInst(SubInst));
+ if (!InBrackets)
+ return finishBundle(IDLoc, Out);
+ return false;
+}
+
+/// ParseDirective parses the Hexagon specific directives
+bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
+ return ParseDirectiveValue(4, DirectiveID.getLoc());
+ if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
+ IDVal.lower() == ".half")
+ return ParseDirectiveValue(2, DirectiveID.getLoc());
+ if (IDVal.lower() == ".falign")
+ return ParseDirectiveFalign(256, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
+ return ParseDirectiveComm(true, DirectiveID.getLoc());
+ if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common"))
+ return ParseDirectiveComm(false, DirectiveID.getLoc());
+ if (IDVal.lower() == ".subsection")
+ return ParseDirectiveSubsection(DirectiveID.getLoc());
+
+ return true;
+}
+bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
+ const MCExpr *Subsection = 0;
+ int64_t Res;
+
+ assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
+ "Invalid subsection directive");
+ getParser().parseExpression(Subsection);
+
+ if (!Subsection->evaluateAsAbsolute(Res))
+ return Error(L, "Cannot evaluate subsection number");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the
+ // negative subsections together and in the same order but at the opposite
+ // end of the section. Only legacy hexagon-gcc created assembly code
+ // used negative subsections.
+ if ((Res < 0) && (Res > -8193))
+ Subsection = MCConstantExpr::create(8192 + Res, this->getContext());
+
+ getStreamer().SubSection(Subsection);
+ return false;
+}
+
+/// ::= .falign [expression]
+bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
+
+ int64_t MaxBytesToFill = 15;
+
+ // if there is an arguement
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+
+ // Make sure we have a number (false is returned if expression is a number)
+ if (getParser().parseExpression(Value) == false) {
+ // Make sure this is a number that is in range
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
+ return Error(ExprLoc, "literal value out of range (256) for falign");
+ MaxBytesToFill = IntValue;
+ Lex();
+ } else {
+ return Error(ExprLoc, "not a valid expression for falign directive");
+ }
+ }
+
+ getTargetStreamer().emitFAlign(16, MaxBytesToFill);
+ Lex();
+
+ return false;
+}
+
+/// ::= .word [ expression (, expression)* ]
+bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ for (;;) {
+ const MCExpr *Value;
+ SMLoc ExprLoc = L;
+ if (getParser().parseExpression(Value))
+ return true;
+
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else
+ getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ // FIXME: Improve diagnostic.
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+ }
+
+ Lex();
+ return false;
+}
+
+// This is largely a copy of AsmParser's ParseDirectiveComm extended to
+// accept a 3rd argument, AccessAlignment which indicates the smallest
+// memory access made to the symbol, expressed in bytes. If no
+// AccessAlignment is specified it defaults to the Alignment Value.
+// Hexagon's .lcomm:
+// .lcomm Symbol, Length, Alignment, AccessAlignment
+bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
+ // FIXME: need better way to detect if AsmStreamer (upstream removed
+ // getKind())
+ if (getStreamer().hasRawTextSupport())
+ return true; // Only object file output requires special treatment.
+
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+
+ int64_t ByteAlignment = 1;
+ SMLoc ByteAlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ ByteAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(ByteAlignment))
+ return true;
+ if (!isPowerOf2_64(ByteAlignment))
+ return Error(ByteAlignmentLoc, "alignment must be a power of 2");
+ }
+
+ int64_t AccessAlignment = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ // The optional access argument specifies the size of the smallest memory
+ // access to be made to the symbol, expressed in bytes.
+ SMLoc AccessAlignmentLoc;
+ Lex();
+ AccessAlignmentLoc = getLexer().getLoc();
+ if (getParser().parseAbsoluteExpression(AccessAlignment))
+ return true;
+
+ if (!isPowerOf2_64(AccessAlignment))
+ return Error(AccessAlignmentLoc, "access alignment must be a power of 2");
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+ Lex();
+
+ // NOTE: a size of zero for a .comm should create a undefined symbol
+ // but a size of .lcomm creates a bss symbol of size zero.
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+ "be less than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (ByteAlignment < 0)
+ return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+ "alignment, can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(Loc, "invalid symbol redefinition");
+
+ HexagonMCELFStreamer &HexagonELFStreamer =
+ static_cast<HexagonMCELFStreamer &>(getStreamer());
+ if (IsLocal) {
+ HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+ }
+
+ HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment,
+ AccessAlignment);
+ return false;
+}
+
+// validate register against architecture
+bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+ return true;
+}
+
+// extern "C" void LLVMInitializeHexagonAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeHexagonAsmParser() {
+ RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+}
+
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_REGISTER_MATCHER
+#include "HexagonGenAsmMatcher.inc"
+
+namespace {
+bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+ if (Index >= Operands.size())
+ return false;
+ MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
+ if (!Operand.isToken())
+ return false;
+ return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
+}
+bool previousIsLoop(OperandVector &Operands, size_t Index) {
+ return previousEqual(Operands, Index, "loop0") ||
+ previousEqual(Operands, Index, "loop1") ||
+ previousEqual(Operands, Index, "sp1loop0") ||
+ previousEqual(Operands, Index, "sp2loop0") ||
+ previousEqual(Operands, Index, "sp3loop0");
+}
+}
+
+bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
+ AsmToken const &Token = getParser().getTok();
+ StringRef String = Token.getString();
+ SMLoc Loc = Token.getLoc();
+ getLexer().Lex();
+ do {
+ std::pair<StringRef, StringRef> HeadTail = String.split('.');
+ if (!HeadTail.first.empty())
+ Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc));
+ if (!HeadTail.second.empty())
+ Operands.push_back(HexagonOperand::CreateToken(
+ String.substr(HeadTail.first.size(), 1), Loc));
+ String = HeadTail.second;
+ } while (!String.empty());
+ return false;
+}
+
+bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
+ unsigned Register;
+ SMLoc Begin;
+ SMLoc End;
+ MCAsmLexer &Lexer = getLexer();
+ if (!ParseRegister(Register, Begin, End)) {
+ if (!ErrorMissingParenthesis)
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::P0:
+ case Hexagon::P1:
+ case Hexagon::P2:
+ case Hexagon::P3:
+ if (previousEqual(Operands, 0, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.push_back(HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ if (previousEqual(Operands, 0, "!") &&
+ previousEqual(Operands, 1, "if")) {
+ if (WarnMissingParenthesis)
+ Warning (Begin, "Missing parenthesis around predicate register");
+ static char const *LParen = "(";
+ static char const *RParen = ")";
+ Operands.insert(Operands.end () - 1,
+ HexagonOperand::CreateToken(LParen, Begin));
+ Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+ AsmToken MaybeDotNew = Lexer.getTok();
+ if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+ MaybeDotNew.getString().equals_lower(".new"))
+ splitIdentifier(Operands);
+ Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+ return false;
+ }
+ break;
+ }
+ Operands.push_back(HexagonOperand::CreateReg(
+ Register, Begin, End));
+ return false;
+ }
+ return splitIdentifier(Operands);
+}
+
+bool HexagonAsmParser::isLabel(AsmToken &Token) {
+ MCAsmLexer &Lexer = getLexer();
+ AsmToken const &Second = Lexer.getTok();
+ AsmToken Third = Lexer.peekTok();
+ StringRef String = Token.getString();
+ if (Token.is(AsmToken::TokenKind::LCurly) ||
+ Token.is(AsmToken::TokenKind::RCurly))
+ return false;
+ if (!Token.is(AsmToken::TokenKind::Identifier))
+ return true;
+ if (!MatchRegisterName(String.lower()))
+ return true;
+ (void)Second;
+ assert(Second.is(AsmToken::Colon));
+ StringRef Raw (String.data(), Third.getString().data() - String.data() +
+ Third.getString().size());
+ std::string Collapsed = Raw;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef Whole = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
+ if (!MatchRegisterName(DotSplit.first.lower()))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) {
+ if (!Contigious && ErrorNoncontigiousRegister) {
+ Error(Loc, "Register name is not contigious");
+ return true;
+ }
+ if (!Contigious && WarnNoncontigiousRegister)
+ Warning(Loc, "Register name is not contigious");
+ return false;
+}
+
+bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ MCAsmLexer &Lexer = getLexer();
+ StartLoc = getLexer().getLoc();
+ SmallVector<AsmToken, 5> Lookahead;
+ StringRef RawString(Lexer.getTok().getString().data(), 0);
+ bool Again = Lexer.is(AsmToken::Identifier);
+ bool NeededWorkaround = false;
+ while (Again) {
+ AsmToken const &Token = Lexer.getTok();
+ RawString = StringRef(RawString.data(),
+ Token.getString().data() - RawString.data () +
+ Token.getString().size());
+ Lookahead.push_back(Token);
+ Lexer.Lex();
+ bool Contigious = Lexer.getTok().getString().data() ==
+ Lookahead.back().getString().data() +
+ Lookahead.back().getString().size();
+ bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) ||
+ Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) ||
+ Lexer.is(AsmToken::Colon);
+ bool Workaround = Lexer.is(AsmToken::Colon) ||
+ Lookahead.back().is(AsmToken::Colon);
+ Again = (Contigious && Type) || (Workaround && Type);
+ NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
+ }
+ std::string Collapsed = RawString;
+ Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+ Collapsed.end());
+ StringRef FullString = Collapsed;
+ std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
+ unsigned DotReg = MatchRegisterName(DotSplit.first.lower());
+ if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ if (DotSplit.second.empty()) {
+ RegNo = DotReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ } else {
+ RegNo = DotReg;
+ size_t First = RawString.find('.');
+ StringRef DotString (RawString.data() + First, RawString.size() - First);
+ Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ }
+ std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
+ unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower());
+ if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ RegNo = ColonReg;
+ EndLoc = Lexer.getLoc();
+ if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+ return true;
+ return false;
+ }
+ while (!Lookahead.empty()) {
+ Lexer.UnLex(Lookahead.back());
+ Lookahead.pop_back();
+ }
+ return true;
+}
+
+bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
+ if (previousEqual(Operands, 0, "call"))
+ return true;
+ if (previousEqual(Operands, 0, "jump"))
+ if (!getLexer().getTok().is(AsmToken::Colon))
+ return true;
+ if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1))
+ return true;
+ if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") &&
+ (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t")))
+ return true;
+ return false;
+}
+
+bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
+ llvm::SmallVector<AsmToken, 4> Tokens;
+ MCAsmLexer &Lexer = getLexer();
+ bool Done = false;
+ static char const * Comma = ",";
+ do {
+ Tokens.emplace_back (Lexer.getTok());
+ Lexer.Lex();
+ switch (Tokens.back().getKind())
+ {
+ case AsmToken::TokenKind::Hash:
+ if (Tokens.size () > 1)
+ if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) {
+ Tokens.insert(Tokens.end() - 2,
+ AsmToken(AsmToken::TokenKind::Comma, Comma));
+ Done = true;
+ }
+ break;
+ case AsmToken::TokenKind::RCurly:
+ case AsmToken::TokenKind::EndOfStatement:
+ case AsmToken::TokenKind::Eof:
+ Done = true;
+ break;
+ default:
+ break;
+ }
+ } while (!Done);
+ while (!Tokens.empty()) {
+ Lexer.UnLex(Tokens.back());
+ Tokens.pop_back();
+ }
+ return getParser().parseExpression(Expr);
+}
+
+bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) {
+ if (implicitExpressionLocation(Operands)) {
+ MCAsmParser &Parser = getParser();
+ SMLoc Loc = Parser.getLexer().getLoc();
+ std::unique_ptr<HexagonOperand> Expr =
+ HexagonOperand::CreateImm(nullptr, Loc, Loc);
+ MCExpr const *& Val = Expr->Imm.Val;
+ Operands.push_back(std::move(Expr));
+ return parseExpression(Val);
+ }
+ return parseOperand(Operands);
+}
+
+/// Parse an instruction.
+bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
+ MCAsmParser &Parser = getParser();
+ MCAsmLexer &Lexer = getLexer();
+ while (true) {
+ AsmToken const &Token = Parser.getTok();
+ switch (Token.getKind()) {
+ case AsmToken::EndOfStatement: {
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::LCurly: {
+ if (!Operands.empty())
+ return true;
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ return false;
+ }
+ case AsmToken::RCurly: {
+ if (Operands.empty()) {
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ }
+ return false;
+ }
+ case AsmToken::Comma: {
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::EqualEqual:
+ case AsmToken::ExclaimEqual:
+ case AsmToken::GreaterEqual:
+ case AsmToken::GreaterGreater:
+ case AsmToken::LessEqual:
+ case AsmToken::LessLess: {
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(0, 1), Token.getLoc()));
+ Operands.push_back(HexagonOperand::CreateToken(
+ Token.getString().substr(1, 1), Token.getLoc()));
+ Lexer.Lex();
+ continue;
+ }
+ case AsmToken::Hash: {
+ bool MustNotExtend = false;
+ bool ImplicitExpression = implicitExpressionLocation(Operands);
+ std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm(
+ nullptr, Lexer.getLoc(), Lexer.getLoc());
+ if (!ImplicitExpression)
+ Operands.push_back(
+ HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+ Lexer.Lex();
+ bool MustExtend = false;
+ bool HiOnly = false;
+ bool LoOnly = false;
+ if (Lexer.is(AsmToken::Hash)) {
+ Lexer.Lex();
+ MustExtend = true;
+ } else if (ImplicitExpression)
+ MustNotExtend = true;
+ AsmToken const &Token = Parser.getTok();
+ if (Token.is(AsmToken::Identifier)) {
+ StringRef String = Token.getString();
+ AsmToken IDToken = Token;
+ if (String.lower() == "hi") {
+ HiOnly = true;
+ } else if (String.lower() == "lo") {
+ LoOnly = true;
+ }
+ if (HiOnly || LoOnly) {
+ AsmToken LParen = Lexer.peekTok();
+ if (!LParen.is(AsmToken::LParen)) {
+ HiOnly = false;
+ LoOnly = false;
+ } else {
+ Lexer.Lex();
+ }
+ }
+ }
+ if (parseExpression(Expr->Imm.Val))
+ return true;
+ int64_t Value;
+ MCContext &Context = Parser.getContext();
+ assert(Expr->Imm.Val != nullptr);
+ if (Expr->Imm.Val->evaluateAsAbsolute(Value)) {
+ if (HiOnly)
+ Expr->Imm.Val = MCBinaryExpr::createLShr(
+ Expr->Imm.Val, MCConstantExpr::create(16, Context), Context);
+ if (HiOnly || LoOnly)
+ Expr->Imm.Val = MCBinaryExpr::createAnd(
+ Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context);
+ }
+ if (MustNotExtend)
+ Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context);
+ Expr->Imm.MustExtend = MustExtend;
+ Operands.push_back(std::move(Expr));
+ continue;
+ }
+ default:
+ break;
+ }
+ if (parseExpressionOrOperand(Operands))
+ return true;
+ }
+}
+
+bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name,
+ AsmToken ID,
+ OperandVector &Operands) {
+ getLexer().UnLex(ID);
+ return parseInstruction(Operands);
+}
+
+namespace {
+MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+ MCOperand &MO1, MCOperand &MO2) {
+ MCInst TmpInst;
+ TmpInst.setOpcode(opCode);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(MO1);
+ TmpInst.addOperand(MO2);
+
+ return TmpInst;
+}
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp);
+
+ switch (Kind) {
+ case MCK_0: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ case MCK__MINUS_1: {
+ int64_t Value;
+ return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
+ ? Match_Success
+ : Match_InvalidOperand;
+ }
+ }
+ if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
+ StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
+ if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind)
+ return Match_Success;
+ if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind)
+ return Match_Success;
+ }
+
+ DEBUG(dbgs() << "Unmatched Operand:");
+ DEBUG(Op->dump());
+ DEBUG(dbgs() << "\n");
+
+ return Match_InvalidOperand;
+}
+
+void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+ std::string errStr;
+ raw_string_ostream ES(errStr);
+ ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
+ if (Max >= 0)
+ ES << "0-" << Max;
+ else
+ ES << Max << "-" << (-Max - 1);
+ Error(IDLoc, ES.str().c_str());
+}
+
+int HexagonAsmParser::processInstruction(MCInst &Inst,
+ OperandVector const &Operands,
+ SMLoc IDLoc, bool &MustExtend) {
+ MCContext &Context = getParser().getContext();
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ std::string r = "r";
+ std::string v = "v";
+ std::string Colon = ":";
+
+ bool is32bit = false; // used to distinguish between CONST32 and CONST64
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+
+ case Hexagon::M4_mpyrr_addr:
+ case Hexagon::S4_addi_asl_ri:
+ case Hexagon::S4_addi_lsr_ri:
+ case Hexagon::S4_andi_asl_ri:
+ case Hexagon::S4_andi_lsr_ri:
+ case Hexagon::S4_ori_asl_ri:
+ case Hexagon::S4_ori_lsr_ri:
+ case Hexagon::S4_or_andix:
+ case Hexagon::S4_subi_asl_ri:
+ case Hexagon::S4_subi_lsr_ri: {
+ MCOperand &Ry = Inst.getOperand(0);
+ MCOperand &src = Inst.getOperand(2);
+ if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg()))
+ return Match_InvalidOperand;
+ break;
+ }
+
+ case Hexagon::C2_cmpgei: {
+ MCOperand &MO = Inst.getOperand(2);
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgti);
+ break;
+ }
+
+ case Hexagon::C2_cmpgeui: {
+ MCOperand &MO = Inst.getOperand(2);
+ int64_t Value;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success && "Assured by matcher");
+ if (Value == 0) {
+ MCInst TmpInst;
+ MCOperand &Pd = Inst.getOperand(0);
+ MCOperand &Rt = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::C2_cmpeq);
+ TmpInst.addOperand(Pd);
+ TmpInst.addOperand(Rt);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ } else {
+ MO.setExpr(MCBinaryExpr::createSub(
+ MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::C2_cmpgtui);
+ }
+ break;
+ }
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0i: {
+ MCOperand &MO = Inst.getOperand(0);
+ // Loop has different opcodes for extended vs not extended, but we should
+ // not use the other opcode as it is a legacy artifact of TD files.
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ // if the operand can fit within a 7:2 field
+ if (Value < (1 << 8) && Value >= -(1 << 8)) {
+ SMLoc myLoc = Operands[2]->getStartLoc();
+ // # is left in startLoc in the case of ##
+ // If '##' found then force extension.
+ if (*myLoc.getPointer() == '#') {
+ MustExtend = true;
+ break;
+ }
+ } else {
+ // If immediate and out of 7:2 range.
+ MustExtend = true;
+ }
+ }
+ break;
+ }
+
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = Inst.getOperand(1);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode(Hexagon::A2_combinew);
+ break;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ break;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = Inst.getOperand(2);
+ unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ MO.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ break;
+ }
+
+ // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ is32bit = true;
+ // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+
+ // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+ if (!Parser.getStreamer().hasRawTextSupport()) {
+ MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+ MCOperand &MO_1 = Inst.getOperand(1);
+ MCOperand &MO_0 = Inst.getOperand(0);
+
+ // push section onto section stack
+ MES->PushSection();
+
+ std::string myCharStr;
+ MCSectionELF *mySection;
+
+ // check if this as an immediate or a symbol
+ int64_t Value;
+ bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute) {
+ // Create a new section - one for each constant
+ // Some or all of the zeros are replaced with the given immediate.
+ if (is32bit) {
+ std::string myImmStr = utohexstr(static_cast<uint32_t>(Value));
+ myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ } else {
+ std::string myImmStr = utohexstr(Value);
+ myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000")
+ .drop_back(myImmStr.size())
+ .str() +
+ myImmStr;
+ }
+
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else if (MO_1.isExpr()) {
+ // .lita - for expressions
+ myCharStr = ".lita";
+ mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->SwitchSection(mySection);
+ unsigned byteSize = is32bit ? 4 : 8;
+ getStreamer().EmitCodeAlignment(byteSize, byteSize);
+
+ MCSymbol *Sym;
+
+ // for symbols, get rid of prepended ".gnu.linkonce.lx."
+
+ // emit symbol if needed
+ if (Absolute) {
+ Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
+ if (Sym->isUndefined()) {
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+ getStreamer().EmitIntValue(Value, byteSize);
+ }
+ } else if (MO_1.isExpr()) {
+ const char *StringStart = 0;
+ const char *StringEnd = 0;
+ if (*Operands[4]->getStartLoc().getPointer() == '#') {
+ StringStart = Operands[5]->getStartLoc().getPointer();
+ StringEnd = Operands[6]->getStartLoc().getPointer();
+ } else { // no pound
+ StringStart = Operands[4]->getStartLoc().getPointer();
+ StringEnd = Operands[5]->getStartLoc().getPointer();
+ }
+
+ unsigned size = StringEnd - StringStart;
+ std::string DotConst = ".CONST_";
+ Sym = getContext().getOrCreateSymbol(DotConst +
+ StringRef(StringStart, size));
+
+ if (Sym->isUndefined()) {
+ // case where symbol is not yet defined: emit symbol
+ getStreamer().EmitLabel(Sym);
+ getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
+ getStreamer().EmitValue(MO_1.getExpr(), 4);
+ }
+ } else
+ llvm_unreachable("unexpected type of machine operand!");
+
+ MES->PopSection();
+
+ if (Sym) {
+ MCInst TmpInst;
+ if (is32bit) // 32 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ else // 64 bit
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+
+ TmpInst.addOperand(MO_0);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+ Inst = TmpInst;
+ }
+ }
+ break;
+
+ // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)"
+ case Hexagon::A2_tfrpi: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0;
+ MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context)));
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO);
+ break;
+ }
+
+ // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)"
+ case Hexagon::TFRI64_V4: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO = Inst.getOperand(1);
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+ unsigned long long u64 = Value;
+ signed int s8 = (u64 >> 32) & 0xFFFFFFFF;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(s8, Context))); // upper 32
+ MCOperand imm2(MCOperand::createExpr(
+ MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2);
+ } else {
+ MCOperand imm(MCOperand::createExpr(
+ MCConstantExpr::create(0, Context))); // upper 32
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO);
+ }
+ break;
+ }
+
+ // Handle $Rdd = combine(##imm, #imm)"
+ case Hexagon::TFRI64_V2_ext: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ MCOperand &MO2 = Inst.getOperand(2);
+ int64_t Value;
+ if (MO2.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ // Handle $Rdd = combine(#imm, ##imm)"
+ case Hexagon::A4_combineii: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &MO1 = Inst.getOperand(1);
+ int64_t Value;
+ if (MO1.getExpr()->evaluateAsAbsolute(Value)) {
+ int s8 = Value;
+ if (s8 < -128 || s8 > 127)
+ OutOfRange(IDLoc, s8, -128);
+ }
+ MCOperand &MO2 = Inst.getOperand(2);
+ Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2);
+ break;
+ }
+
+ case Hexagon::S2_tableidxb_goodsyntax: {
+ Inst.setOpcode(Hexagon::S2_tableidxb);
+ break;
+ }
+
+ case Hexagon::S2_tableidxh_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxh);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxw_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(2, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxw);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_tableidxd_goodsyntax: {
+ MCInst TmpInst;
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &_dst_ = Inst.getOperand(1);
+ MCOperand &Rs = Inst.getOperand(2);
+ MCOperand &Imm4 = Inst.getOperand(3);
+ MCOperand &Imm6 = Inst.getOperand(4);
+ Imm6.setExpr(MCBinaryExpr::createSub(
+ Imm6.getExpr(), MCConstantExpr::create(3, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_tableidxd);
+ TmpInst.addOperand(Rx);
+ TmpInst.addOperand(_dst_);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm4);
+ TmpInst.addOperand(Imm6);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_mpyui: {
+ Inst.setOpcode(Hexagon::M2_mpyi);
+ break;
+ }
+ case Hexagon::M2_mpysmi: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (!MustExtend) {
+ if (Value < 0 && Value > -256) {
+ Imm.setExpr(MCConstantExpr::create(Value * -1, Context));
+ TmpInst.setOpcode(Hexagon::M2_mpysin);
+ } else if (Value < 256 && Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ } else {
+ if (Value >= 0)
+ TmpInst.setOpcode(Hexagon::M2_mpysip);
+ else
+ return Match_InvalidOperand;
+ }
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ MCInst TmpInst;
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rd = $Rs
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(Rs);
+ TmpInst.addOperand(Imm);
+ }
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ }
+ break;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ Inst.setOpcode(Hexagon::A4_boundscheck_hi);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // raw:lo
+ Inst.setOpcode(Hexagon::A4_boundscheck_lo);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rs = Inst.getOperand(1);
+ unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+ if (RegNum & 1) { // Odd mapped to raw:hi
+ Inst.setOpcode(Hexagon::A2_addsph);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped raw:lo
+ Inst.setOpcode(Hexagon::A2_addspl);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rs.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCInst TmpInst;
+ MCOperand &Rxx = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(2);
+ MCOperand &Rt = Inst.getOperand(3);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to sat:raw:hi
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped sat:raw:lo
+ TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ // Registers are in different positions
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rxx);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(Rt);
+ Inst = TmpInst;
+ break;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+ if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ std::string Name =
+ r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ } else { // Even mapped rnd:sat:raw:lo
+ Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ std::string Name =
+ r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+ StringRef RegPair = Name;
+ Rt.setReg(MatchRegisterName(RegPair));
+ }
+ break;
+ }
+
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0)
+ Inst.setOpcode(Hexagon::S2_vsathub);
+ else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ }
+ break;
+ }
+
+ case Hexagon::S5_vasrhrnd_goodsyntax: {
+ MCOperand &Rdd = Inst.getOperand(0);
+ MCOperand &Rss = Inst.getOperand(1);
+ MCOperand &Imm = Inst.getOperand(2);
+ int64_t Value;
+ bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);
+ (void)Absolute;
+ if (Value == 0) {
+ MCInst TmpInst;
+ unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+ std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+ StringRef Reg1(R1);
+ Rss.setReg(MatchRegisterName(Reg1));
+ // Add a new operand for the second register in the pair.
+ std::string R2 = r + llvm::utostr_32(RegPairNum);
+ StringRef Reg2(R2);
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(Rdd);
+ TmpInst.addOperand(Rss);
+ TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+ Inst = TmpInst;
+ } else {
+ Imm.setExpr(MCBinaryExpr::createSub(
+ Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+ Inst.setOpcode(Hexagon::S5_vasrhrnd);
+ }
+ break;
+ }
+
+ case Hexagon::A2_not: {
+ MCInst TmpInst;
+ MCOperand &Rd = Inst.getOperand(0);
+ MCOperand &Rs = Inst.getOperand(1);
+ TmpInst.setOpcode(Hexagon::A2_subri);
+ TmpInst.addOperand(Rd);
+ TmpInst.addOperand(
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ TmpInst.addOperand(Rs);
+ Inst = TmpInst;
+ break;
+ }
+ } // switch
+
+ return Match_Success;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
index cb7e633..ea96eb0 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -868,7 +868,7 @@ void BT::visitNonBranch(const MachineInstr *MI) {
continue;
bool Changed = false;
- if (!Eval || !ResMap.has(RD.Reg)) {
+ if (!Eval || ResMap.count(RD.Reg) == 0) {
// Set to "ref" (aka "bottom").
uint16_t DefBW = ME.getRegBitWidth(RD);
RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
@@ -951,11 +951,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) {
// be processed.
for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
const MachineBasicBlock *SB = *I;
- if (SB->isLandingPad())
+ if (SB->isEHPad())
Targets.insert(SB);
}
if (FallsThrough) {
- MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator BIt = B.getIterator();
MachineFunction::const_iterator Next = std::next(BIt);
if (Next != MF.end())
Targets.insert(&*Next);
@@ -1005,7 +1005,7 @@ void BT::put(RegisterRef RR, const RegisterCell &RC) {
// Replace all references to bits from OldRR with the corresponding bits
// in NewRR.
void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
- assert(Map.has(OldRR.Reg) && "OldRR not present in map");
+ assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map");
BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
uint16_t OMB = OM.first(), OME = OM.last();
@@ -1104,9 +1104,9 @@ void BT::run() {
}
// If block end has been reached, add the fall-through edge to the queue.
if (It == End) {
- MachineFunction::const_iterator BIt = &B;
+ MachineFunction::const_iterator BIt = B.getIterator();
MachineFunction::const_iterator Next = std::next(BIt);
- if (Next != MF.end()) {
+ if (Next != MF.end() && B.isSuccessor(&*Next)) {
int ThisN = B.getNumber();
int NextN = Next->getNumber();
FlowQ.push(CFGEdge(ThisN, NextN));
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
index ed002a7..959c831 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -36,9 +36,7 @@ struct BitTracker {
typedef SetVector<const MachineBasicBlock *> BranchTargetList;
- struct CellMapType : public std::map<unsigned,RegisterCell> {
- bool has(unsigned Reg) const;
- };
+ typedef std::map<unsigned, RegisterCell> CellMapType;
BitTracker(const MachineEvaluator &E, MachineFunction &F);
~BitTracker();
@@ -79,7 +77,6 @@ private:
// Abstraction of a reference to bit at position Pos from a register Reg.
struct BitTracker::BitRef {
BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
- BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {}
bool operator== (const BitRef &BR) const {
// If Reg is 0, disregard Pos.
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
@@ -146,7 +143,6 @@ struct BitTracker::BitValue {
BitValue(ValueType T = Top) : Type(T) {}
BitValue(bool B) : Type(B ? One : Zero) {}
- BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {}
BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
bool operator== (const BitValue &V) const {
@@ -279,11 +275,6 @@ struct BitTracker::RegisterCell {
return !operator==(RC);
}
- const RegisterCell &operator=(const RegisterCell &RC) {
- Bits = RC.Bits;
- return *this;
- }
-
// Generate a "ref" cell for the corresponding register. In the resulting
// cell each bit will be described as being the same as the corresponding
// bit in register Reg (i.e. the cell is "defined" by register Reg).
@@ -344,11 +335,6 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
return RC;
}
-
-inline bool BitTracker::CellMapType::has(unsigned Reg) const {
- return find(Reg) != end();
-}
-
// A class to evaluate target's instructions and update the cell maps.
// This is used internally by the bit tracker. A target that wants to
// utilize this should implement the evaluation functions (noted below)
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 9cc1e94..4a9c341 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -7,42 +7,45 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "hexagon-disassembler"
+
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
-
-#include "llvm/MC/MCContext.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/raw_ostream.h"
-#include <array>
+#include "llvm/Support/TargetRegistry.h"
#include <vector>
using namespace llvm;
using namespace Hexagon;
-#define DEBUG_TYPE "hexagon-disassembler"
-
-// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
/// \brief Hexagon disassembler for all Hexagon platforms.
class HexagonDisassembler : public MCDisassembler {
public:
+ std::unique_ptr<MCInstrInfo const> const MCII;
std::unique_ptr<MCInst *> CurrentBundle;
- HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
- : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {}
+ HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -52,23 +55,57 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
+
+ void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
+ void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
};
}
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+// Forward declare these because the auto-generated code will reference them.
+// Definitions are further down.
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
- void const *Decoder);
+ const void *Decoder);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder);
static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
raw_ostream &os);
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst);
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t Address, const void *Decoder);
static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
@@ -95,129 +132,19 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-
-static const uint16_t IntRegDecoderTable[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
- Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
- Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
- Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
- Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
- Hexagon::R30, Hexagon::R31};
-
-static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
- Hexagon::P2, Hexagon::P3};
-
-static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
- const uint16_t Table[], size_t Size) {
- if (RegNo < Size) {
- Inst.addOperand(MCOperand::createReg(Table[RegNo]));
- return MCDisassembler::Success;
- } else
- return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- unsigned Register = IntRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- static const uint16_t CtrlRegDecoderTable[] = {
- Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
- Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6, Hexagon::C7,
- Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
- Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPCH};
-
- if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0]))
- return MCDisassembler::Fail;
-
- if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
- return MCDisassembler::Fail;
-
- unsigned Register = CtrlRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- static const uint16_t CtrlReg64DecoderTable[] = {
- Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2,
- Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister,
- Hexagon::C7_6, Hexagon::NoRegister, Hexagon::C9_8,
- Hexagon::NoRegister, Hexagon::C11_10, Hexagon::NoRegister,
- Hexagon::CS, Hexagon::NoRegister, Hexagon::UPC,
- Hexagon::NoRegister};
-
- if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
- return MCDisassembler::Fail;
-
- if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
- return MCDisassembler::Fail;
-
- unsigned Register = CtrlReg64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- unsigned Register = 0;
- switch (RegNo) {
- case 0:
- Register = Hexagon::M0;
- break;
- case 1:
- Register = Hexagon::M1;
- break;
- default:
- return MCDisassembler::Fail;
- }
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- const void *Decoder) {
- static const uint16_t DoubleRegDecoderTable[] = {
- Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
- Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
- Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
- Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
-
- return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable,
- sizeof(DoubleRegDecoderTable)));
-}
-
-static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t /*Address*/,
- void const *Decoder) {
- if (RegNo > 3)
- return MCDisassembler::Fail;
-
- unsigned Register = PredRegDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return MCDisassembler::Success;
-}
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder);
#include "HexagonGenDisassemblerTables.inc"
-static MCDisassembler *createHexagonDisassembler(Target const &T,
- MCSubtargetInfo const &STI,
+static MCDisassembler *createHexagonDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
MCContext &Ctx) {
- return new HexagonDisassembler(STI, Ctx);
+ return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
}
extern "C" void LLVMInitializeHexagonDisassembler() {
@@ -235,8 +162,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size = 0;
*CurrentBundle = &MI;
- MI.setOpcode(Hexagon::BUNDLE);
- MI.addOperand(MCOperand::createImm(0));
+ MI = HexagonMCInstrInfo::createBundle();
while (Result == Success && Complete == false) {
if (Bytes.size() < HEXAGON_INSTR_SIZE)
return MCDisassembler::Fail;
@@ -246,7 +172,21 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
}
- return Result;
+ if(Result == MCDisassembler::Fail)
+ return Result;
+ HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+ if(!Checker.check())
+ return MCDisassembler::Fail;
+ return MCDisassembler::Success;
+}
+
+namespace {
+HexagonDisassembler const &disassembler(void const *Decoder) {
+ return *static_cast<HexagonDisassembler const *>(Decoder);
+}
+MCContext &contextFromDecoder(void const *Decoder) {
+ return disassembler(Decoder).getContext();
+}
}
DecodeStatus HexagonDisassembler::getSingleInstruction(
@@ -255,8 +195,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
uint32_t Instruction =
- llvm::support::endian::read<uint32_t, llvm::support::little,
- llvm::support::unaligned>(Bytes.data());
+ (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
@@ -360,8 +299,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
MILow->setOpcode(opLow);
MCInst *MIHigh = new (getContext()) MCInst;
MIHigh->setOpcode(opHigh);
- AddSubinstOperands(MILow, opLow, instLow);
- AddSubinstOperands(MIHigh, opHigh, instHigh);
+ addSubinstOperands(MILow, opLow, instLow);
+ addSubinstOperands(MIHigh, opHigh, instHigh);
// see ConvertToSubInst() in
// lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -378,102 +317,774 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
// Calling the auto-generated decoder function.
Result =
decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+
+ // If a, "standard" insn isn't found check special cases.
+ if (MCDisassembler::Success != Result ||
+ MI.getOpcode() == Hexagon::A4_ext) {
+ Result = decodeImmext(MI, Instruction, this);
+ if (MCDisassembler::Success != Result) {
+ Result = decodeSpecial(MI, Instruction);
+ }
+ } else {
+ // If the instruction is a compound instruction, register values will
+ // follow the duplex model, so the register values in the MCInst are
+ // incorrect. If the instruction is a compound, loop through the
+ // operands and change registers appropriately.
+ if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
+ HexagonII::TypeCOMPOUND) {
+ for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
+ if (i->isReg()) {
+ unsigned reg = i->getReg() - Hexagon::R0;
+ i->setReg(getRegFromSubinstEncoding(reg));
+ }
+ }
+ }
+ }
+ }
+
+ if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
+ unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
+ MCOperand &MCO = MI.getOperand(OpIndex);
+ assert(MCO.isReg() && "New value consumers must be registers");
+ unsigned Register =
+ getContext().getRegisterInfo()->getEncodingValue(MCO.getReg());
+ if ((Register & 0x6) == 0)
+ // HexagonPRM 10.11 Bit 1-2 == 0 is reserved
+ return MCDisassembler::Fail;
+ unsigned Lookback = (Register & 0x6) >> 1;
+ unsigned Offset = 1;
+ bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.end() - 1;
+ for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
+ if (i == n)
+ // Couldn't find producer
+ return MCDisassembler::Fail;
+ if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst()))
+ // Skip scalars when calculating distances for vectors
+ ++Lookback;
+ if (HexagonMCInstrInfo::isImmext(*i->getInst()))
+ ++Lookback;
+ if (Offset == Lookback)
+ break;
+ }
+ auto const &Inst = *i->getInst();
+ bool SubregBit = (Register & 0x1) != 0;
+ if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) {
+ // If subreg bit is set we're selecting the second produced newvalue
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg();
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
+ unsigned Producer =
+ HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
+ if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
+ Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
+ else if (SubregBit)
+ // Subreg bit should not be set for non-doublevector newvalue producers
+ return MCDisassembler::Fail;
+ assert(Producer != Hexagon::NoRegister);
+ MCO.setReg(Producer);
+ } else
+ return MCDisassembler::Fail;
}
+ adjustExtendedInstructions(MI, MCB);
+ MCInst const *Extender =
+ HexagonMCInstrInfo::extenderForIndex(MCB,
+ HexagonMCInstrInfo::bundleSize(MCB));
+ if(Extender != nullptr) {
+ MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
+ *MI.getOperand(1).getInst() : MI;
+ if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
+ !HexagonMCInstrInfo::isExtended(*MCII, Inst))
+ return MCDisassembler::Fail;
+ }
return Result;
}
+void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
+ MCInst const &MCB) const {
+ if (!HexagonMCInstrInfo::hasExtenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
+ unsigned opcode;
+ // This code is used by the disassembler to disambiguate between GP
+ // relative and absolute addressing instructions since they both have
+ // same encoding bits. However, an absolute addressing instruction must
+ // follow an immediate extender. Disassembler alwaus select absolute
+ // addressing instructions first and uses this code to change them into
+ // GP relative instruction in the absence of the corresponding immediate
+ // extender.
+ switch (MCI.getOpcode()) {
+ case Hexagon::S2_storerbabs:
+ opcode = Hexagon::S2_storerbgp;
+ break;
+ case Hexagon::S2_storerhabs:
+ opcode = Hexagon::S2_storerhgp;
+ break;
+ case Hexagon::S2_storerfabs:
+ opcode = Hexagon::S2_storerfgp;
+ break;
+ case Hexagon::S2_storeriabs:
+ opcode = Hexagon::S2_storerigp;
+ break;
+ case Hexagon::S2_storerbnewabs:
+ opcode = Hexagon::S2_storerbnewgp;
+ break;
+ case Hexagon::S2_storerhnewabs:
+ opcode = Hexagon::S2_storerhnewgp;
+ break;
+ case Hexagon::S2_storerinewabs:
+ opcode = Hexagon::S2_storerinewgp;
+ break;
+ case Hexagon::S2_storerdabs:
+ opcode = Hexagon::S2_storerdgp;
+ break;
+ case Hexagon::L4_loadrb_abs:
+ opcode = Hexagon::L2_loadrbgp;
+ break;
+ case Hexagon::L4_loadrub_abs:
+ opcode = Hexagon::L2_loadrubgp;
+ break;
+ case Hexagon::L4_loadrh_abs:
+ opcode = Hexagon::L2_loadrhgp;
+ break;
+ case Hexagon::L4_loadruh_abs:
+ opcode = Hexagon::L2_loadruhgp;
+ break;
+ case Hexagon::L4_loadri_abs:
+ opcode = Hexagon::L2_loadrigp;
+ break;
+ case Hexagon::L4_loadrd_abs:
+ opcode = Hexagon::L2_loadrdgp;
+ break;
+ default:
+ opcode = MCI.getOpcode();
+ }
+ MCI.setOpcode(opcode);
+ }
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+ ArrayRef<MCPhysReg> Table) {
+ if (RegNo < Table.size()) {
+ Inst.addOperand(MCOperand::createReg(Table[RegNo]));
+ return MCDisassembler::Success;
+ }
+
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ static const MCPhysReg IntRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+ Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9,
+ Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+ Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+ Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+ Hexagon::R30, Hexagon::R31};
+
+ return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
+}
+
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecRegDecoderTable[] = {
+ Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
+ Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+ Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19,
+ Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24,
+ Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29,
+ Hexagon::V30, Hexagon::V31};
+
+ return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable);
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg DoubleRegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11,
+ Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
+
+ return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecDblRegDecoderTable[] = {
+ Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7,
+ Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11,
+ Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+
+ return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+ Hexagon::P2, Hexagon::P3};
+
+ return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
+ Hexagon::Q2, Hexagon::Q3};
+
+ return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable);
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlRegDecoderTable[] = {
+ Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+ Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
+ Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+ Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+ };
+
+ if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ static const MCPhysReg CtrlReg64DecoderTable[] = {
+ Hexagon::C1_0, Hexagon::NoRegister,
+ Hexagon::C3_2, Hexagon::NoRegister,
+ Hexagon::C7_6, Hexagon::NoRegister,
+ Hexagon::C9_8, Hexagon::NoRegister,
+ Hexagon::C11_10, Hexagon::NoRegister,
+ Hexagon::CS, Hexagon::NoRegister,
+ Hexagon::UPC, Hexagon::NoRegister
+ };
+
+ if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+ return MCDisassembler::Fail;
+
+ if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = CtrlReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ unsigned Register = 0;
+ switch (RegNo) {
+ case 0:
+ Register = Hexagon::M0;
+ break;
+ case 1:
+ Register = Hexagon::M1;
+ break;
+ default:
+ return MCDisassembler::Fail;
+ }
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+namespace {
+uint32_t fullValue(MCInstrInfo const &MCII,
+ MCInst &MCB,
+ MCInst &MI,
+ int64_t Value) {
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+ if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+ return Value;
+ unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+ int64_t Bits;
+ bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+ assert(Success);(void)Success;
+ uint32_t Upper26 = static_cast<uint32_t>(Bits);
+ uint32_t Operand = Upper26 | Lower6;
+ return Operand;
+}
+template <size_t T>
+void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64<T>(tmp));
+ int64_t Extended = SignExtend64<32>(FullValue);
+ HexagonMCInstrInfo::addConstant(MI, Extended,
+ Disassembler.getContext());
+}
+}
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, tmp);
+ assert(FullValue >= 0 && "Negative in unsigned decoder");
+ HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<16>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<16>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<12>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<12>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<11>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<11>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<12>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
return MCDisassembler::Success;
}
static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<13>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<13>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<14>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<14>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<10>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<10>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
const void *Decoder) {
- uint64_t imm = SignExtend64<8>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<8>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<6>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<4>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<4>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<5>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<5>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<6>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<6>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- uint64_t imm = SignExtend64<7>(tmp);
- MI.addOperand(MCOperand::createImm(imm));
+ signedDecoder<7>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<10>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder) {
+ signedDecoder<19>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+
+// custom decoder for various jump/call immediates
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+ const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+ // r13_2 is not extendable, so if there are no extent bits, it's r13_2
+ if (Bits == 0)
+ Bits = 15;
+ uint32_t FullValue = fullValue(*Disassembler.MCII,
+ **Disassembler.CurrentBundle,
+ MI, SignExtend64(tmp, Bits));
+ int64_t Extended = SignExtend64<32>(FullValue) + Address;
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
+ 0, 4))
+ HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+ return MCDisassembler::Success;
+}
+
+// Addressing mode dependent load store opcode map.
+// - If an insn is preceded by an extender the address is absolute.
+// - memw(##symbol) = r0
+// - If an insn is not preceded by an extender the address is GP relative.
+// - memw(gp + #symbol) = r0
+// Please note that the instructions must be ordered in the descending order
+// of their opcode.
+// HexagonII::INST_ICLASS_ST
+static const unsigned int StoreConditionalOpcodeData[][2] = {
+ {S4_pstorerdfnew_abs, 0xafc02084},
+ {S4_pstorerdtnew_abs, 0xafc02080},
+ {S4_pstorerdf_abs, 0xafc00084},
+ {S4_pstorerdt_abs, 0xafc00080},
+ {S4_pstorerinewfnew_abs, 0xafa03084},
+ {S4_pstorerinewtnew_abs, 0xafa03080},
+ {S4_pstorerhnewfnew_abs, 0xafa02884},
+ {S4_pstorerhnewtnew_abs, 0xafa02880},
+ {S4_pstorerbnewfnew_abs, 0xafa02084},
+ {S4_pstorerbnewtnew_abs, 0xafa02080},
+ {S4_pstorerinewf_abs, 0xafa01084},
+ {S4_pstorerinewt_abs, 0xafa01080},
+ {S4_pstorerhnewf_abs, 0xafa00884},
+ {S4_pstorerhnewt_abs, 0xafa00880},
+ {S4_pstorerbnewf_abs, 0xafa00084},
+ {S4_pstorerbnewt_abs, 0xafa00080},
+ {S4_pstorerifnew_abs, 0xaf802084},
+ {S4_pstoreritnew_abs, 0xaf802080},
+ {S4_pstorerif_abs, 0xaf800084},
+ {S4_pstorerit_abs, 0xaf800080},
+ {S4_pstorerhfnew_abs, 0xaf402084},
+ {S4_pstorerhtnew_abs, 0xaf402080},
+ {S4_pstorerhf_abs, 0xaf400084},
+ {S4_pstorerht_abs, 0xaf400080},
+ {S4_pstorerbfnew_abs, 0xaf002084},
+ {S4_pstorerbtnew_abs, 0xaf002080},
+ {S4_pstorerbf_abs, 0xaf000084},
+ {S4_pstorerbt_abs, 0xaf000080}};
+// HexagonII::INST_ICLASS_LD
+
+// HexagonII::INST_ICLASS_LD_ST_2
+static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
+ {L4_loadri_abs, 0x49800000},
+ {L4_loadruh_abs, 0x49600000},
+ {L4_loadrh_abs, 0x49400000},
+ {L4_loadrub_abs, 0x49200000},
+ {L4_loadrb_abs, 0x49000000},
+ {S2_storerdabs, 0x48c00000},
+ {S2_storerinewabs, 0x48a01000},
+ {S2_storerhnewabs, 0x48a00800},
+ {S2_storerbnewabs, 0x48a00000},
+ {S2_storeriabs, 0x48800000},
+ {S2_storerfabs, 0x48600000},
+ {S2_storerhabs, 0x48400000},
+ {S2_storerbabs, 0x48000000}};
+static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
+static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
+
+ unsigned MachineOpcode = 0;
+ unsigned LLVMOpcode = 0;
+
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
+ for (size_t i = 0; i < NumCondS; ++i) {
+ if ((insn & StoreConditionalOpcodeData[i][1]) ==
+ StoreConditionalOpcodeData[i][1]) {
+ MachineOpcode = StoreConditionalOpcodeData[i][1];
+ LLVMOpcode = StoreConditionalOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+ if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
+ for (size_t i = 0; i < NumLS; ++i) {
+ if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
+ MachineOpcode = LoadStoreOpcodeData[i][1];
+ LLVMOpcode = LoadStoreOpcodeData[i][0];
+ break;
+ }
+ }
+ }
+
+ if (MachineOpcode) {
+ unsigned Value = 0;
+ unsigned shift = 0;
+ MI.setOpcode(LLVMOpcode);
+ // Remove the parse bits from the insn.
+ insn &= ~HexagonII::INST_PARSE_MASK;
+
+ switch (LLVMOpcode) {
+ default:
+ return MCDisassembler::Fail;
+ break;
+
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdfnew_abs:
+ case Hexagon::S4_pstorerdtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbnewf_abs:
+ case Hexagon::S4_pstorerbnewt_abs:
+ case Hexagon::S4_pstorerbnewfnew_abs:
+ case Hexagon::S4_pstorerbnewtnew_abs:
+ case Hexagon::S4_pstorerhnewf_abs:
+ case Hexagon::S4_pstorerhnewt_abs:
+ case Hexagon::S4_pstorerhnewfnew_abs:
+ case Hexagon::S4_pstorerhnewtnew_abs:
+ case Hexagon::S4_pstorerinewf_abs:
+ case Hexagon::S4_pstorerinewt_abs:
+ case Hexagon::S4_pstorerinewfnew_abs:
+ case Hexagon::S4_pstorerinewtnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbfnew_abs:
+ case Hexagon::S4_pstorerbtnew_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhfnew_abs:
+ case Hexagon::S4_pstorerhtnew_abs:
+ case Hexagon::S4_pstorerif_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerifnew_abs:
+ case Hexagon::S4_pstoreritnew_abs: {
+ // op: Pv
+ Value = insn & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 12) & UINT64_C(48);
+ Value |= (insn >> 3) & UINT64_C(15);
+ MI.addOperand(MCOperand::createImm(Value));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ case Hexagon::L4_ploadrdf_abs:
+ case Hexagon::L4_ploadrdt_abs:
+ case Hexagon::L4_ploadrdfnew_abs:
+ case Hexagon::L4_ploadrdtnew_abs: {
+ // op: Rdd
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = ((insn >> 9) & UINT64_C(3));
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = ((insn >> 15) & UINT64_C(62));
+ Value |= ((insn >> 8) & UINT64_C(1));
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+ }
+
+ case Hexagon::L4_ploadrbf_abs:
+ case Hexagon::L4_ploadrbt_abs:
+ case Hexagon::L4_ploadrbfnew_abs:
+ case Hexagon::L4_ploadrbtnew_abs:
+ case Hexagon::L4_ploadrhf_abs:
+ case Hexagon::L4_ploadrht_abs:
+ case Hexagon::L4_ploadrhfnew_abs:
+ case Hexagon::L4_ploadrhtnew_abs:
+ case Hexagon::L4_ploadrubf_abs:
+ case Hexagon::L4_ploadrubt_abs:
+ case Hexagon::L4_ploadrubfnew_abs:
+ case Hexagon::L4_ploadrubtnew_abs:
+ case Hexagon::L4_ploadruhf_abs:
+ case Hexagon::L4_ploadruht_abs:
+ case Hexagon::L4_ploadruhfnew_abs:
+ case Hexagon::L4_ploadruhtnew_abs:
+ case Hexagon::L4_ploadrif_abs:
+ case Hexagon::L4_ploadrit_abs:
+ case Hexagon::L4_ploadrifnew_abs:
+ case Hexagon::L4_ploadritnew_abs:
+ // op: Rd
+ Value = insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ // op: Pt
+ Value = (insn >> 9) & UINT64_C(3);
+ DecodePredRegsRegisterClass(MI, Value, 0, 0);
+ // op: u6
+ Value = (insn >> 15) & UINT64_C(62);
+ Value |= (insn >> 8) & UINT64_C(1);
+ MI.addOperand(MCOperand::createImm(Value));
+ break;
+
+ // op: g16_2
+ case (Hexagon::L4_loadri_abs):
+ ++shift;
+ // op: g16_1
+ case Hexagon::L4_loadrh_abs:
+ case Hexagon::L4_loadruh_abs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::L4_loadrb_abs:
+ case Hexagon::L4_loadrub_abs: {
+ // op: Rd
+ Value |= insn & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ break;
+ }
+
+ case Hexagon::L4_loadrd_abs: {
+ Value = insn & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(511);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ break;
+ }
+
+ case Hexagon::S2_storerdabs: {
+ // op: g16_3
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << 3));
+ // op: Rtt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storerinewabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhnewabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbnewabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Nt
+ Value = (insn >> 8) & UINT64_C(7);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+
+ // op: g16_2
+ case Hexagon::S2_storeriabs:
+ ++shift;
+ // op: g16_1
+ case Hexagon::S2_storerhabs:
+ case Hexagon::S2_storerfabs:
+ ++shift;
+ // op: g16_0
+ case Hexagon::S2_storerbabs: {
+ Value = (insn >> 11) & UINT64_C(49152);
+ Value |= (insn >> 7) & UINT64_C(15872);
+ Value |= (insn >> 5) & UINT64_C(256);
+ Value |= insn & UINT64_C(255);
+ MI.addOperand(MCOperand::createImm(Value << shift));
+ // op: Rt
+ Value = (insn >> 8) & UINT64_C(31);
+ DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+ break;
+ }
+ }
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+ void const *Decoder) {
+
+ // Instruction Class for a constant a extender: bits 31:28 = 0x0000
+ if ((~insn & 0xf0000000) == 0xf0000000) {
+ unsigned Value;
+ // 27:16 High 12 bits of 26-bit extender.
+ Value = (insn & 0x0fff0000) << 4;
+ // 13:0 Low 14 bits of 26-bit extender.
+ Value |= ((insn & 0x3fff) << 6);
+ MI.setOpcode(Hexagon::A4_ext);
+ HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
+ return MCDisassembler::Success;
+ }
+ return MCDisassembler::Fail;
+}
+
// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
enum subInstBinaryValues {
V4_SA1_addi_BITS = 0x0000,
@@ -731,6 +1342,8 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
return Hexagon::R0 + encoded_reg;
else if (encoded_reg < 16)
return Hexagon::R0 + encoded_reg + 8;
+
+ // patently false value
return Hexagon::NoRegister;
}
@@ -739,10 +1352,13 @@ static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
return Hexagon::D0 + encoded_dreg;
else if (encoded_dreg < 8)
return Hexagon::D0 + encoded_dreg + 4;
+
+ // patently false value
return Hexagon::NoRegister;
}
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
+void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
+ unsigned inst) const {
int64_t operand;
MCOperand Op;
switch (opcode) {
@@ -762,8 +1378,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
case Hexagon::V4_SS2_allocframe:
// u 8-4{5_3}
operand = ((inst & 0x1f0) >> 4) << 3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL1_loadri_io:
// Rd 3-0, Rs 7-4, u 11-8{4_2}
@@ -774,8 +1389,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 6;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL1_loadrub_io:
// Rd 3-0, Rs 7-4, u 11-8
@@ -786,8 +1400,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrb_io:
// Rd 3-0, Rs 7-4, u 10-8
@@ -798,8 +1411,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x700) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrh_io:
case Hexagon::V4_SL2_loadruh_io:
@@ -811,8 +1423,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x700) >> 8) << 1;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadrd_sp:
// Rdd 2-0, u 7-3{5_3}
@@ -820,8 +1431,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x0f8) >> 3) << 3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SL2_loadri_sp:
// Rd 3-0, u 8-4{5_2}
@@ -829,8 +1439,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x1f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_addi:
// Rx 3-0 (x2), s7 10-4
@@ -839,8 +1448,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
MI->addOperand(Op);
MI->addOperand(Op);
operand = SignExtend64<7>((inst & 0x7f0) >> 4);
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_addrx:
// Rx 3-0 (x2), Rs 7-4
@@ -873,8 +1481,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x3f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_seti:
// Rd 3-0, u 9-4
@@ -882,8 +1489,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x3f0) >> 4;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_clrf:
case Hexagon::V4_SA1_clrfnew:
@@ -901,8 +1507,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = inst & 0x3;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_combine0i:
case Hexagon::V4_SA1_combine1i:
@@ -913,8 +1518,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0x060) >> 5;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SA1_combinerz:
case Hexagon::V4_SA1_combinezr:
@@ -932,8 +1536,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf00) >> 8;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -944,8 +1547,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0xf00) >> 8) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -957,8 +1559,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = inst & 0xf;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SS2_storewi0:
case Hexagon::V4_SS2_storewi1:
@@ -967,25 +1568,23 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = (inst & 0xf) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
break;
case Hexagon::V4_SS2_stored_sp:
// s 8-3{6_3}, Rtt 2-0
operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getDRegFromSubinstEncoding(inst & 0x7);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
+ break;
case Hexagon::V4_SS2_storeh_io:
// Rs 7-4, u 10-8{3_1}, Rt 3-0
operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
operand = ((inst & 0x700) >> 8) << 1;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
@@ -993,8 +1592,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
case Hexagon::V4_SS2_storew_sp:
// u 8-4{5_2}, Rd 3-0
operand = ((inst & 0x1f0) >> 4) << 2;
- Op = MCOperand::createImm(operand);
- MI->addOperand(Op);
+ HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
operand = getRegFromSubinstEncoding(inst & 0xf);
Op = MCOperand::createReg(operand);
MI->addOperand(Op);
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.h b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
index d360be2..ed7d957 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.h
@@ -47,15 +47,8 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class MachineInstr;
- class MCInst;
- class MCInstrInfo;
- class HexagonAsmPrinter;
class HexagonTargetMachine;
- void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
- HexagonAsmPrinter &AP);
-
/// \brief Creates a Hexagon-specific Target Transformation Info pass.
ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
} // end namespace llvm;
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index 53a687c..1189cfd 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -24,14 +24,32 @@ include "llvm/Target/Target.td"
// Hexagon Architectures
def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
+ "true", "Hexagon HVX instructions">;
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
+ "true", "Hexagon HVX Double instructions">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"HST->hasV5TOps()">;
-def NoV5T : Predicate<"!HST->hasV5TOps()">;
-def UseMEMOP : Predicate<"HST->useMemOps()">;
-def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def HasV5T : Predicate<"HST->hasV5TOps()">;
+def NoV5T : Predicate<"!HST->hasV5TOps()">;
+def HasV55T : Predicate<"HST->hasV55TOps()">,
+ AssemblerPredicate<"ArchV55">;
+def HasV60T : Predicate<"HST->hasV60TOps()">,
+ AssemblerPredicate<"ArchV60">;
+def UseMEMOP : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def UseHVXDbl : Predicate<"HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVXDbl">;
+def UseHVXSgl : Predicate<"HST->useHVXSglOps()">;
+
+def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
+ AssemblerPredicate<"ExtensionHVX">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -53,6 +71,7 @@ class NewValueRel: PredNewRel;
// NewValueRel - Filter class used to relate load/store instructions having
// different addressing modes with each other.
class AddrModeRel: NewValueRel;
+class IntrinsicsRel;
//===----------------------------------------------------------------------===//
// Generate mapping table to relate non-predicate instructions with their
@@ -62,7 +81,7 @@ class AddrModeRel: NewValueRel;
def getPredOpcode : InstrMapping {
let FilterClass = "PredRel";
// Instructions with the same BaseOpcode and isNVStore values form a row.
- let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
// Instructions with the same predicate sense form a column.
let ColFields = ["PredSense"];
// The key column is the unpredicated instructions.
@@ -77,7 +96,7 @@ def getPredOpcode : InstrMapping {
//
def getFalsePredOpcode : InstrMapping {
let FilterClass = "PredRel";
- let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
let ColFields = ["PredSense"];
let KeyCol = ["true"];
let ValueCols = [["false"]];
@@ -89,7 +108,7 @@ def getFalsePredOpcode : InstrMapping {
//
def getTruePredOpcode : InstrMapping {
let FilterClass = "PredRel";
- let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+ let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
let ColFields = ["PredSense"];
let KeyCol = ["false"];
let ValueCols = [["true"]];
@@ -125,7 +144,7 @@ def getPredOldOpcode : InstrMapping {
//
def getNewValueOpcode : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
let ColFields = ["NValueST"];
let KeyCol = ["false"];
let ValueCols = [["true"]];
@@ -137,16 +156,16 @@ def getNewValueOpcode : InstrMapping {
//
def getNonNVStore : InstrMapping {
let FilterClass = "NewValueRel";
- let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+ let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
let ColFields = ["NValueST"];
let KeyCol = ["true"];
let ValueCols = [["false"]];
}
-def getBasedWithImmOffset : InstrMapping {
+def getBaseWithImmOffset : InstrMapping {
let FilterClass = "AddrModeRel";
let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
- "isMEMri", "isFloat"];
+ "isFloat"];
let ColFields = ["addrMode"];
let KeyCol = ["Absolute"];
let ValueCols = [["BaseImmOffset"]];
@@ -168,6 +187,37 @@ def getRegForm : InstrMapping {
let ValueCols = [["reg"]];
}
+def getRegShlForm : InstrMapping {
+ let FilterClass = "ImmRegShl";
+ let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+ let ColFields = ["InputType"];
+ let KeyCol = ["imm"];
+ let ValueCols = [["reg"]];
+}
+
+def notTakenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["true"];
+ let ValueCols = [["false"]];
+}
+
+def takenBranchPrediction : InstrMapping {
+ let FilterClass = "PredRel";
+ let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"];
+ let ColFields = ["isBrTaken"];
+ let KeyCol = ["false"];
+ let ValueCols = [["true"]];
+}
+
+def getRealHWInstr : InstrMapping {
+ let FilterClass = "IntrinsicsRel";
+ let RowFields = ["BaseOpcode"];
+ let ColFields = ["InstrType"];
+ let KeyCol = ["Pseudo"];
+ let ValueCols = [["Pseudo"], ["Real"]];
+}
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -192,12 +242,22 @@ def : Proc<"hexagonv4", HexagonModelV4,
[ArchV4]>;
def : Proc<"hexagonv5", HexagonModelV4,
[ArchV4, ArchV5]>;
+def : Proc<"hexagonv55", HexagonModelV55,
+ [ArchV4, ArchV5, ArchV55]>;
+def : Proc<"hexagonv60", HexagonModelV60,
+ [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def HexagonAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string TokenizingCharacters = "#()=:.<>!+*";
+}
+
def Hexagon : Target {
// Pull in Instruction Info:
let InstructionSet = HexagonInstrInfo;
+ let AssemblyParserVariants = [HexagonAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 05728d2..e213089 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -40,11 +40,13 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
@@ -56,12 +58,27 @@
using namespace llvm;
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
#define DEBUG_TYPE "asm-printer"
static cl::opt<bool> AlignCalls(
"hexagon-align-calls", cl::Hidden, cl::init(true),
cl::desc("Insert falign after call instruction for Hexagon target"));
+// Given a scalar register return its pair.
+inline static unsigned getHexagonRegisterPair(unsigned Reg,
+ const MCRegisterInfo *RI) {
+ assert(Hexagon::IntRegsRegClass.contains(Reg));
+ MCSuperRegIterator SR(Reg, RI, false);
+ unsigned Pair = *SR;
+ assert(Hexagon::DoubleRegsRegClass.contains(Pair));
+ return Pair;
+}
+
HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
@@ -102,9 +119,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
//
bool HexagonAsmPrinter::
isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
- if (MBB->hasAddressTaken()) {
+ if (MBB->hasAddressTaken())
return false;
- }
return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
}
@@ -117,7 +133,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &OS) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
switch (ExtraCode[0]) {
default:
@@ -173,45 +190,407 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+ MCStreamer &OutStreamer,
+ const MCOperand &Imm, int AlignSize) {
+ MCSymbol *Sym;
+ int64_t Value;
+ if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
+ StringRef sectionPrefix;
+ std::string ImmString;
+ StringRef Name;
+ if (AlignSize == 8) {
+ Name = ".CONST_0000000000000000";
+ sectionPrefix = ".gnu.linkonce.l8";
+ ImmString = utohexstr(Value);
+ } else {
+ Name = ".CONST_00000000";
+ sectionPrefix = ".gnu.linkonce.l4";
+ ImmString = utohexstr(static_cast<uint32_t>(Value));
+ }
+
+ std::string symbolName = // Yes, leading zeros are kept.
+ Name.drop_back(ImmString.size()).str() + ImmString;
+ std::string sectionName = sectionPrefix.str() + symbolName;
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+ OutStreamer.SwitchSection(Section);
+
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+ OutStreamer.EmitIntValue(Value, AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ } else {
+ assert(Imm.isExpr() && "Expected expression and found none");
+ const MachineOperand &MO = MI.getOperand(1);
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = AP.getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = AP.GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = AP.GetJTISymbol(MO.getIndex());
+ else
+ llvm_unreachable("Unknown operand type!");
+
+ StringRef SymbolName = MOSymbol->getName();
+ std::string LitaName = ".CONST_" + SymbolName.str();
+
+ MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+ ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+ OutStreamer.SwitchSection(Section);
+ Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
+ if (Sym->isUndefined()) {
+ OutStreamer.EmitLabel(Sym);
+ OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
+ OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
+ OutStreamer.EmitCodeAlignment(AlignSize);
+ }
+ }
+ return Sym;
+}
+
+void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MI) {
+ MCInst &MappedInst = static_cast <MCInst &>(Inst);
+ const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+
+ switch (Inst.getOpcode()) {
+ default: return;
+
+ // "$dst = CONST64(#$src1)",
+ case Hexagon::CONST64_Float_Real:
+ case Hexagon::CONST64_Int_Real:
+ if (!OutStreamer->hasRawTextSupport()) {
+ const MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+
+ }
+ break;
+ case Hexagon::CONST32:
+ case Hexagon::CONST32_Float_Real:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::FCONST32_nsdata:
+ if (!OutStreamer->hasRawTextSupport()) {
+ MCOperand &Imm = MappedInst.getOperand(1);
+ MCSectionSubPair Current = OutStreamer->getCurrentSection();
+ MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+ OutStreamer->SwitchSection(Current.first, Current.second);
+ MCInst TmpInst;
+ MCOperand &Reg = MappedInst.getOperand(0);
+ TmpInst.setOpcode(Hexagon::L2_loadrigp);
+ TmpInst.addOperand(Reg);
+ TmpInst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Sym, OutContext)));
+ MappedInst = TmpInst;
+ }
+ break;
+
+ // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use
+ // C2_or during instruction selection itself but it results
+ // into suboptimal code.
+ case Hexagon::C2_pxfer_map: {
+ MCOperand &Ps = Inst.getOperand(1);
+ MappedInst.setOpcode(Hexagon::C2_or);
+ MappedInst.addOperand(Ps);
+ return;
+ }
+
+ // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo
+ // The insn is mapped from the 4 operand to the 3 operand raw form taking
+ // 3 register pairs.
+ case Hexagon::M2_vrcmpys_acc_s1: {
+ MCOperand &Rt = Inst.getOperand(3);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::M2_vrcmpys_s1: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::M2_vrcmpys_s1rp: {
+ MCOperand &Rt = Inst.getOperand(2);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+ else
+ MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+
+ case Hexagon::A4_boundscheck: {
+ MCOperand &Rs = Inst.getOperand(1);
+ assert (Rs.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rs.getReg());
+ if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_hi);
+ else // raw:lo
+ MappedInst.setOpcode(Hexagon::A4_boundscheck_lo);
+ Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
+ return;
+ }
+ case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ int64_t Imm;
+ MCExpr const *Expr = MO.getExpr();
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::S2_vsathub);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::S5_vasrhrnd_goodsyntax:
+ case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+ MCOperand &MO2 = MappedInst.getOperand(2);
+ MCExpr const *Expr = MO2.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_combinew);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ MCOperand &MO1 = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+ // Add a new operand for the second register in the pair.
+ TmpInst.addOperand(MCOperand::createReg(High));
+ TmpInst.addOperand(MCOperand::createReg(Low));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax)
+ TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+ else
+ TmpInst.setOpcode(Hexagon::S5_vasrhrnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd
+ case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+ MCOperand &MO = Inst.getOperand(2);
+ MCExpr const *Expr = MO.getExpr();
+ int64_t Imm;
+ bool Success = Expr->evaluateAsAbsolute(Imm);
+ assert (Success && "Expected immediate and none was found");(void)Success;
+ MCInst TmpInst;
+ if (Imm == 0) {
+ TmpInst.setOpcode(Hexagon::A2_tfr);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ MappedInst = TmpInst;
+ return;
+ }
+ TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+ TmpInst.addOperand(MappedInst.getOperand(0));
+ TmpInst.addOperand(MappedInst.getOperand(1));
+ const MCExpr *One = MCConstantExpr::create(1, OutContext);
+ const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Sub));
+ MappedInst = TmpInst;
+ return;
+ }
+ case Hexagon::TFRI_f:
+ MappedInst.setOpcode(Hexagon::A2_tfrsi);
+ return;
+ case Hexagon::TFRI_cPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveit);
+ return;
+ case Hexagon::TFRI_cNotPt_f:
+ MappedInst.setOpcode(Hexagon::C2_cmoveif);
+ return;
+ case Hexagon::MUX_ri_f:
+ MappedInst.setOpcode(Hexagon::C2_muxri);
+ return;
+ case Hexagon::MUX_ir_f:
+ MappedInst.setOpcode(Hexagon::C2_muxir);
+ return;
+
+ // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
+ case Hexagon::A2_tfrpi: {
+ MCInst TmpInst;
+ MCOperand &Rdd = MappedInst.getOperand(0);
+ MCOperand &MO = MappedInst.getOperand(1);
+
+ TmpInst.setOpcode(Hexagon::A2_combineii);
+ TmpInst.addOperand(Rdd);
+ int64_t Imm;
+ bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
+ if (Success && Imm < 0) {
+ const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(MOne));
+ } else {
+ const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(Zero));
+ }
+ TmpInst.addOperand(MO);
+ MappedInst = TmpInst;
+ return;
+ }
+ // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+ case Hexagon::A2_tfrp: {
+ MCOperand &MO = MappedInst.getOperand(1);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode(Hexagon::A2_combinew);
+ return;
+ }
+
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+ ? Hexagon::C2_ccombinewt
+ : Hexagon::C2_ccombinewf);
+ return;
+ }
+ case Hexagon::A2_tfrptnew:
+ case Hexagon::A2_tfrpfnew: {
+ MCOperand &MO = MappedInst.getOperand(2);
+ unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+ unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+ MO.setReg(High);
+ // Add a new operand for the second register in the pair.
+ MappedInst.addOperand(MCOperand::createReg(Low));
+ MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
+ return;
+ }
+
+ case Hexagon::M2_mpysmi: {
+ MCOperand &Imm = MappedInst.getOperand(2);
+ MCExpr const *Expr = Imm.getExpr();
+ int64_t Value;
+ bool Success = Expr->evaluateAsAbsolute(Value);
+ assert(Success);(void)Success;
+ if (Value < 0 && Value > -256) {
+ MappedInst.setOpcode(Hexagon::M2_mpysin);
+ Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext));
+ }
+ else
+ MappedInst.setOpcode(Hexagon::M2_mpysip);
+ return;
+ }
+
+ case Hexagon::A2_addsp: {
+ MCOperand &Rt = Inst.getOperand(1);
+ assert (Rt.isReg() && "Expected register and none was found");
+ unsigned Reg = RI->getEncodingValue(Rt.getReg());
+ if (Reg & 1)
+ MappedInst.setOpcode(Hexagon::A2_addsph);
+ else
+ MappedInst.setOpcode(Hexagon::A2_addspl);
+ Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+ return;
+ }
+ case Hexagon::HEXAGON_V6_vd0_pseudo:
+ case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+ MCInst TmpInst;
+ assert (Inst.getOperand(0).isReg() &&
+ "Expected register and none was found");
+
+ TmpInst.setOpcode(Hexagon::V6_vxor);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ MappedInst = TmpInst;
+ return;
+ }
+
+ }
+}
+
/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
/// the current output stream.
///
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- MCInst MCB;
- MCB.setOpcode(Hexagon::BUNDLE);
- MCB.addOperand(MCOperand::createImm(0));
+ MCInst MCB = HexagonMCInstrInfo::createBundle();
+ const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
if (MI->isBundle()) {
const MachineBasicBlock* MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator MII = MI;
+ MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
unsigned IgnoreCount = 0;
- for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) {
+ for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
++IgnoreCount;
- else {
- HexagonLowerToMC(MII, MCB, *this);
- }
- }
+ else
+ HexagonLowerToMC(MCII, &*MII, MCB, *this);
}
- else {
- HexagonLowerToMC(MI, MCB, *this);
- HexagonMCInstrInfo::padEndloop(MCB);
- }
- // Examine the packet and try to find instructions that can be converted
- // to compounds.
- HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(),
- OutStreamer->getContext(), MCB);
- // Examine the packet and convert pairs of instructions to duplex
- // instructions when possible.
- SmallVector<DuplexCandidate, 8> possibleDuplexes;
- possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(
- *Subtarget->getInstrInfo(), MCB);
- HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget,
- OutStreamer->getContext(), MCB, possibleDuplexes);
- EmitToStreamer(*OutStreamer, MCB);
+ else
+ HexagonLowerToMC(MCII, MI, MCB, *this);
+
+ bool Ok = HexagonMCInstrInfo::canonicalizePacket(
+ MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
+ assert(Ok);
+ (void)Ok;
+ if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+ return;
+ OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
}
extern "C" void LLVMInitializeHexagonAsmPrinter() {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index 792fc8b..a78d97e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -42,6 +42,10 @@ namespace llvm {
void EmitInstruction(const MachineInstr *MI) override;
+ void HexagonProcessInstruction(MCInst &Inst,
+ const MachineInstr &MBB);
+
+
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
new file mode 100644
index 0000000..77907b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -0,0 +1,2778 @@
+//===--- HexagonBitSimplify.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexbit"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+namespace llvm {
+ void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
+ FunctionPass *createHexagonBitSimplify();
+}
+
+namespace {
+ // Set of virtual registers, based on BitVector.
+ struct RegisterSet : private BitVector {
+ RegisterSet() : BitVector() {}
+ explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+ using BitVector::clear;
+ using BitVector::count;
+
+ unsigned find_first() const {
+ int First = BitVector::find_first();
+ if (First < 0)
+ return 0;
+ return x2v(First);
+ }
+
+ unsigned find_next(unsigned Prev) const {
+ int Next = BitVector::find_next(v2x(Prev));
+ if (Next < 0)
+ return 0;
+ return x2v(Next);
+ }
+
+ RegisterSet &insert(unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return static_cast<RegisterSet&>(BitVector::set(Idx));
+ }
+ RegisterSet &remove(unsigned R) {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return *this;
+ return static_cast<RegisterSet&>(BitVector::reset(Idx));
+ }
+
+ RegisterSet &insert(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+ }
+ RegisterSet &remove(const RegisterSet &Rs) {
+ return static_cast<RegisterSet&>(BitVector::reset(Rs));
+ }
+
+ reference operator[](unsigned R) {
+ unsigned Idx = v2x(R);
+ ensure(Idx);
+ return BitVector::operator[](Idx);
+ }
+ bool operator[](unsigned R) const {
+ unsigned Idx = v2x(R);
+ assert(Idx < size());
+ return BitVector::operator[](Idx);
+ }
+ bool has(unsigned R) const {
+ unsigned Idx = v2x(R);
+ if (Idx >= size())
+ return false;
+ return BitVector::test(Idx);
+ }
+
+ bool empty() const {
+ return !BitVector::any();
+ }
+ bool includes(const RegisterSet &Rs) const {
+ // A.BitVector::test(B) <=> A-B != {}
+ return !Rs.BitVector::test(*this);
+ }
+ bool intersects(const RegisterSet &Rs) const {
+ return BitVector::anyCommon(Rs);
+ }
+
+ private:
+ void ensure(unsigned Idx) {
+ if (size() <= Idx)
+ resize(std::max(Idx+1, 32U));
+ }
+ static inline unsigned v2x(unsigned v) {
+ return TargetRegisterInfo::virtReg2Index(v);
+ }
+ static inline unsigned x2v(unsigned x) {
+ return TargetRegisterInfo::index2VirtReg(x);
+ }
+ };
+
+
+ struct PrintRegSet {
+ PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+ : RS(S), TRI(RI) {}
+ friend raw_ostream &operator<< (raw_ostream &OS,
+ const PrintRegSet &P);
+ private:
+ const RegisterSet &RS;
+ const TargetRegisterInfo *TRI;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
+ LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+ OS << '{';
+ for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+ OS << ' ' << PrintReg(R, P.TRI);
+ OS << " }";
+ return OS;
+ }
+}
+
+
+namespace {
+ class Transformation;
+
+ class HexagonBitSimplify : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+ initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
+ }
+ virtual const char *getPassName() const {
+ return "Hexagon bit simplification";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
+ static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
+ static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
+ const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
+ static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W);
+ static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
+ uint16_t W, uint64_t &U);
+ static bool replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI);
+ static bool getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
+ static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+ static bool parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+
+ static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin);
+ static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
+ uint16_t Begin, const HexagonInstrInfo &HII);
+
+ static const TargetRegisterClass *getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
+ static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
+
+ private:
+ MachineDominatorTree *MDT;
+
+ bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+ };
+
+ char HexagonBitSimplify::ID = 0;
+ typedef HexagonBitSimplify HBS;
+
+
+ // The purpose of this class is to provide a common facility to traverse
+ // the function top-down or bottom-up via the dominator tree, and keep
+ // track of the available registers.
+ class Transformation {
+ public:
+ bool TopDown;
+ Transformation(bool TD) : TopDown(TD) {}
+ virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
+ virtual ~Transformation() {}
+ };
+}
+
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+ "Hexagon bit simplification", false, false)
+
+
+bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
+ RegisterSet &AVs) {
+ MachineDomTreeNode *N = MDT->getNode(&B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ bool Changed = false;
+
+ if (T.TopDown)
+ Changed = T.processBlock(B, AVs);
+
+ RegisterSet Defs;
+ for (auto &I : B)
+ getInstrDefs(I, Defs);
+ RegisterSet NewAVs = AVs;
+ NewAVs.insert(Defs);
+
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ Changed |= visitBlock(*SB, T, NewAVs);
+ }
+ if (!T.TopDown)
+ Changed |= T.processBlock(B, AVs);
+
+ return Changed;
+}
+
+//
+// Utility functions:
+//
+void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
+ RegisterSet &Defs) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Defs.insert(R);
+ }
+}
+
+void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
+ RegisterSet &Uses) {
+ for (auto &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.insert(R);
+ }
+}
+
+// Check if all the bits in range [B, E) in both cells are equal.
+bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
+ uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
+ uint16_t W) {
+ for (uint16_t i = 0; i < W; ++i) {
+ // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
+ if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
+ return false;
+ // Same for RC2[i].
+ if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
+ return false;
+ if (RC1[B1+i] != RC2[B2+i])
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].num())
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W) {
+ assert(B < RC.width() && B+W <= RC.width());
+ for (uint16_t i = B; i < B+W; ++i)
+ if (!RC[i].is(0))
+ return false;
+ return true;
+}
+
+
+bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
+ uint16_t B, uint16_t W, uint64_t &U) {
+ assert(B < RC.width() && B+W <= RC.width());
+ int64_t T = 0;
+ for (uint16_t i = B+W; i > B; --i) {
+ const BitTracker::BitValue &BV = RC[i-1];
+ T <<= 1;
+ if (BV.is(1))
+ T |= 1;
+ else if (!BV.is(0))
+ return false;
+ }
+ U = T;
+ return true;
+}
+
+
+bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
+ MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
+ unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
+ unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+ !TargetRegisterInfo::isVirtualRegister(NewR))
+ return false;
+ auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+ decltype(End) NextI;
+ for (auto I = Begin; I != End; I = NextI) {
+ NextI = std::next(I);
+ if (I->getSubReg() != OldSR)
+ continue;
+ I->setReg(NewR);
+ I->setSubReg(NewSR);
+ }
+ return Begin != End;
+}
+
+
+// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
+// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
+// if this succeeded, otherwise return false.
+bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
+ unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
+ const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ assert(RR.Sub == 0);
+ Begin = 0;
+ Width = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (RR.Sub == 0) {
+ Begin = 0;
+ Width = 64;
+ return true;
+ }
+ assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
+ Width = 32;
+ Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
+ return true;
+ }
+ return false;
+}
+
+
+// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
+// subregister.
+bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
+ BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+ assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
+ unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
+ assert(Sub1 != Sub2);
+ if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+ SL = I.getOperand(1);
+ SH = I.getOperand(3);
+ return true;
+ }
+ if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+ SH = I.getOperand(1);
+ SL = I.getOperand(3);
+ return true;
+ }
+ return false;
+}
+
+
+// All stores (except 64-bit stores) take a 32-bit register as the source
+// of the value to be stored. If the instruction stores into a location
+// that is shorter than 32 bits, some bits of the source register are not
+// used. For each store instruction, calculate the set of used bits in
+// the source register, and set appropriate bits in Bits. Return true if
+// the bits are calculated, false otherwise.
+bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+ uint16_t Begin) {
+ using namespace Hexagon;
+
+ switch (Opc) {
+ // Store byte
+ case S2_storerb_io: // memb(Rs32+#s11:0)=Rt32
+ case S2_storerbnew_io: // memb(Rs32+#s11:0)=Nt8.new
+ case S2_pstorerbt_io: // if (Pv4) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbf_io: // if (!Pv4) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S4_pstorerbfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
+ case S2_pstorerbnewt_io: // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S2_pstorerbnewf_io: // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewtnew_io: // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S4_pstorerbnewfnew_io: // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+ case S2_storerb_pi: // memb(Rx32++#s4:0)=Rt32
+ case S2_storerbnew_pi: // memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbt_pi: // if (Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
+ case S2_pstorerbnewt_pi: // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewf_pi: // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewtnew_pi: // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S2_pstorerbnewfnew_pi: // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+ case S4_storerb_ap: // memb(Re32=#U6)=Rt32
+ case S4_storerbnew_ap: // memb(Re32=#U6)=Nt8.new
+ case S2_storerb_pr: // memb(Rx32++Mu2)=Rt32
+ case S2_storerbnew_pr: // memb(Rx32++Mu2)=Nt8.new
+ case S4_storerb_ur: // memb(Ru32<<#u2+#U6)=Rt32
+ case S4_storerbnew_ur: // memb(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerb_pbr: // memb(Rx32++Mu2:brev)=Rt32
+ case S2_storerbnew_pbr: // memb(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerb_pci: // memb(Rx32++#s4:0:circ(Mu2))=Rt32
+ case S2_storerbnew_pci: // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
+ case S2_storerb_pcr: // memb(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerbnew_pcr: // memb(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerb_rr: // memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerbnew_rr: // memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerbnewt_rr: // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewf_rr: // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewtnew_rr: // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerbnewfnew_rr: // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerbgp: // memb(gp+#u16:0)=Rt32
+ case S2_storerbnewgp: // memb(gp+#u16:0)=Nt8.new
+ case S4_pstorerbt_abs: // if (Pv4) memb(#u6)=Rt32
+ case S4_pstorerbf_abs: // if (!Pv4) memb(#u6)=Rt32
+ case S4_pstorerbtnew_abs: // if (Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbfnew_abs: // if (!Pv4.new) memb(#u6)=Rt32
+ case S4_pstorerbnewt_abs: // if (Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewf_abs: // if (!Pv4) memb(#u6)=Nt8.new
+ case S4_pstorerbnewtnew_abs: // if (Pv4.new) memb(#u6)=Nt8.new
+ case S4_pstorerbnewfnew_abs: // if (!Pv4.new) memb(#u6)=Nt8.new
+ Bits.set(Begin, Begin+8);
+ return true;
+
+ // Store low half
+ case S2_storerh_io: // memh(Rs32+#s11:1)=Rt32
+ case S2_storerhnew_io: // memh(Rs32+#s11:1)=Nt8.new
+ case S2_pstorerht_io: // if (Pv4) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhf_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S4_pstorerhfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
+ case S2_pstorerhnewt_io: // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S2_pstorerhnewf_io: // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewtnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S4_pstorerhnewfnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+ case S2_storerh_pi: // memh(Rx32++#s4:1)=Rt32
+ case S2_storerhnew_pi: // memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerht_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
+ case S2_pstorerhnewt_pi: // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewf_pi: // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewtnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S2_pstorerhnewfnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+ case S4_storerh_ap: // memh(Re32=#U6)=Rt32
+ case S4_storerhnew_ap: // memh(Re32=#U6)=Nt8.new
+ case S2_storerh_pr: // memh(Rx32++Mu2)=Rt32
+ case S2_storerhnew_pr: // memh(Rx32++Mu2)=Nt8.new
+ case S4_storerh_ur: // memh(Ru32<<#u2+#U6)=Rt32
+ case S4_storerhnew_ur: // memh(Ru32<<#u2+#U6)=Nt8.new
+ case S2_storerh_pbr: // memh(Rx32++Mu2:brev)=Rt32
+ case S2_storerhnew_pbr: // memh(Rx32++Mu2:brev)=Nt8.new
+ case S2_storerh_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt32
+ case S2_storerhnew_pci: // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
+ case S2_storerh_pcr: // memh(Rx32++I:circ(Mu2))=Rt32
+ case S2_storerhnew_pcr: // memh(Rx32++I:circ(Mu2))=Nt8.new
+ case S4_storerh_rr: // memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerht_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_pstorerhfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+ case S4_storerhnew_rr: // memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewt_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewf_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewtnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S4_pstorerhnewfnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+ case S2_storerhgp: // memh(gp+#u16:1)=Rt32
+ case S2_storerhnewgp: // memh(gp+#u16:1)=Nt8.new
+ case S4_pstorerht_abs: // if (Pv4) memh(#u6)=Rt32
+ case S4_pstorerhf_abs: // if (!Pv4) memh(#u6)=Rt32
+ case S4_pstorerhtnew_abs: // if (Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhfnew_abs: // if (!Pv4.new) memh(#u6)=Rt32
+ case S4_pstorerhnewt_abs: // if (Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewf_abs: // if (!Pv4) memh(#u6)=Nt8.new
+ case S4_pstorerhnewtnew_abs: // if (Pv4.new) memh(#u6)=Nt8.new
+ case S4_pstorerhnewfnew_abs: // if (!Pv4.new) memh(#u6)=Nt8.new
+ Bits.set(Begin, Begin+16);
+ return true;
+
+ // Store high half
+ case S2_storerf_io: // memh(Rs32+#s11:1)=Rt.H32
+ case S2_pstorerft_io: // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S2_pstorerff_io: // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerftnew_io: // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S4_pstorerffnew_io: // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+ case S2_storerf_pi: // memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerft_pi: // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerff_pi: // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerftnew_pi: // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S2_pstorerffnew_pi: // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+ case S4_storerf_ap: // memh(Re32=#U6)=Rt.H32
+ case S2_storerf_pr: // memh(Rx32++Mu2)=Rt.H32
+ case S4_storerf_ur: // memh(Ru32<<#u2+#U6)=Rt.H32
+ case S2_storerf_pbr: // memh(Rx32++Mu2:brev)=Rt.H32
+ case S2_storerf_pci: // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
+ case S2_storerf_pcr: // memh(Rx32++I:circ(Mu2))=Rt.H32
+ case S4_storerf_rr: // memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerft_rr: // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerff_rr: // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerftnew_rr: // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S4_pstorerffnew_rr: // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+ case S2_storerfgp: // memh(gp+#u16:1)=Rt.H32
+ case S4_pstorerft_abs: // if (Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerff_abs: // if (!Pv4) memh(#u6)=Rt.H32
+ case S4_pstorerftnew_abs: // if (Pv4.new) memh(#u6)=Rt.H32
+ case S4_pstorerffnew_abs: // if (!Pv4.new) memh(#u6)=Rt.H32
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+
+ return false;
+}
+
+
+// For an instruction with opcode Opc, calculate the set of bits that it
+// uses in a register in operand OpN. This only calculates the set of used
+// bits for cases where it does not depend on any operands (as is the case
+// in shifts, for example). For concrete instructions from a program, the
+// operand may be a subregister of a larger register, while Bits would
+// correspond to the larger register in its entirety. Because of that,
+// the parameter Begin can be used to indicate which bit of Bits should be
+// considered the LSB of of the operand.
+bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
+ BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
+ using namespace Hexagon;
+
+ const MCInstrDesc &D = HII.get(Opc);
+ if (D.mayStore()) {
+ if (OpN == D.getNumOperands()-1)
+ return getUsedBitsInStore(Opc, Bits, Begin);
+ return false;
+ }
+
+ switch (Opc) {
+ // One register source. Used bits: R1[0-7].
+ case A2_sxtb:
+ case A2_zxtb:
+ case A4_cmpbeqi:
+ case A4_cmpbgti:
+ case A4_cmpbgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[0-15].
+ case A2_aslh:
+ case A2_sxth:
+ case A2_zxth:
+ case A4_cmpheqi:
+ case A4_cmphgti:
+ case A4_cmphgtui:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // One register source. Used bits: R1[16-31].
+ case A2_asrh:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-7], R2[0-7].
+ case A4_cmpbeq:
+ case A4_cmpbgt:
+ case A4_cmpbgtu:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+8);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[0-15].
+ case A4_cmpheq:
+ case A4_cmphgt:
+ case A4_cmphgtu:
+ case A2_addh_h16_ll:
+ case A2_addh_h16_sat_ll:
+ case A2_addh_l16_ll:
+ case A2_addh_l16_sat_ll:
+ case A2_combine_ll:
+ case A2_subh_h16_ll:
+ case A2_subh_h16_sat_ll:
+ case A2_subh_l16_ll:
+ case A2_subh_l16_sat_ll:
+ case M2_mpy_acc_ll_s0:
+ case M2_mpy_acc_ll_s1:
+ case M2_mpy_acc_sat_ll_s0:
+ case M2_mpy_acc_sat_ll_s1:
+ case M2_mpy_ll_s0:
+ case M2_mpy_ll_s1:
+ case M2_mpy_nac_ll_s0:
+ case M2_mpy_nac_ll_s1:
+ case M2_mpy_nac_sat_ll_s0:
+ case M2_mpy_nac_sat_ll_s1:
+ case M2_mpy_rnd_ll_s0:
+ case M2_mpy_rnd_ll_s1:
+ case M2_mpy_sat_ll_s0:
+ case M2_mpy_sat_ll_s1:
+ case M2_mpy_sat_rnd_ll_s0:
+ case M2_mpy_sat_rnd_ll_s1:
+ case M2_mpyd_acc_ll_s0:
+ case M2_mpyd_acc_ll_s1:
+ case M2_mpyd_ll_s0:
+ case M2_mpyd_ll_s1:
+ case M2_mpyd_nac_ll_s0:
+ case M2_mpyd_nac_ll_s1:
+ case M2_mpyd_rnd_ll_s0:
+ case M2_mpyd_rnd_ll_s1:
+ case M2_mpyu_acc_ll_s0:
+ case M2_mpyu_acc_ll_s1:
+ case M2_mpyu_ll_s0:
+ case M2_mpyu_ll_s1:
+ case M2_mpyu_nac_ll_s0:
+ case M2_mpyu_nac_ll_s1:
+ case M2_mpyud_acc_ll_s0:
+ case M2_mpyud_acc_ll_s1:
+ case M2_mpyud_ll_s0:
+ case M2_mpyud_ll_s1:
+ case M2_mpyud_nac_ll_s0:
+ case M2_mpyud_nac_ll_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources. Used bits: R1[0-15], R2[16-31].
+ case A2_addh_h16_lh:
+ case A2_addh_h16_sat_lh:
+ case A2_combine_lh:
+ case A2_subh_h16_lh:
+ case A2_subh_h16_sat_lh:
+ case M2_mpy_acc_lh_s0:
+ case M2_mpy_acc_lh_s1:
+ case M2_mpy_acc_sat_lh_s0:
+ case M2_mpy_acc_sat_lh_s1:
+ case M2_mpy_lh_s0:
+ case M2_mpy_lh_s1:
+ case M2_mpy_nac_lh_s0:
+ case M2_mpy_nac_lh_s1:
+ case M2_mpy_nac_sat_lh_s0:
+ case M2_mpy_nac_sat_lh_s1:
+ case M2_mpy_rnd_lh_s0:
+ case M2_mpy_rnd_lh_s1:
+ case M2_mpy_sat_lh_s0:
+ case M2_mpy_sat_lh_s1:
+ case M2_mpy_sat_rnd_lh_s0:
+ case M2_mpy_sat_rnd_lh_s1:
+ case M2_mpyd_acc_lh_s0:
+ case M2_mpyd_acc_lh_s1:
+ case M2_mpyd_lh_s0:
+ case M2_mpyd_lh_s1:
+ case M2_mpyd_nac_lh_s0:
+ case M2_mpyd_nac_lh_s1:
+ case M2_mpyd_rnd_lh_s0:
+ case M2_mpyd_rnd_lh_s1:
+ case M2_mpyu_acc_lh_s0:
+ case M2_mpyu_acc_lh_s1:
+ case M2_mpyu_lh_s0:
+ case M2_mpyu_lh_s1:
+ case M2_mpyu_nac_lh_s0:
+ case M2_mpyu_nac_lh_s1:
+ case M2_mpyud_acc_lh_s0:
+ case M2_mpyud_acc_lh_s1:
+ case M2_mpyud_lh_s0:
+ case M2_mpyud_lh_s1:
+ case M2_mpyud_nac_lh_s0:
+ case M2_mpyud_nac_lh_s1:
+ // These four are actually LH.
+ case A2_addh_l16_hl:
+ case A2_addh_l16_sat_hl:
+ case A2_subh_l16_hl:
+ case A2_subh_l16_sat_hl:
+ if (OpN == 1) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[0-15].
+ case A2_addh_h16_hl:
+ case A2_addh_h16_sat_hl:
+ case A2_combine_hl:
+ case A2_subh_h16_hl:
+ case A2_subh_h16_sat_hl:
+ case M2_mpy_acc_hl_s0:
+ case M2_mpy_acc_hl_s1:
+ case M2_mpy_acc_sat_hl_s0:
+ case M2_mpy_acc_sat_hl_s1:
+ case M2_mpy_hl_s0:
+ case M2_mpy_hl_s1:
+ case M2_mpy_nac_hl_s0:
+ case M2_mpy_nac_hl_s1:
+ case M2_mpy_nac_sat_hl_s0:
+ case M2_mpy_nac_sat_hl_s1:
+ case M2_mpy_rnd_hl_s0:
+ case M2_mpy_rnd_hl_s1:
+ case M2_mpy_sat_hl_s0:
+ case M2_mpy_sat_hl_s1:
+ case M2_mpy_sat_rnd_hl_s0:
+ case M2_mpy_sat_rnd_hl_s1:
+ case M2_mpyd_acc_hl_s0:
+ case M2_mpyd_acc_hl_s1:
+ case M2_mpyd_hl_s0:
+ case M2_mpyd_hl_s1:
+ case M2_mpyd_nac_hl_s0:
+ case M2_mpyd_nac_hl_s1:
+ case M2_mpyd_rnd_hl_s0:
+ case M2_mpyd_rnd_hl_s1:
+ case M2_mpyu_acc_hl_s0:
+ case M2_mpyu_acc_hl_s1:
+ case M2_mpyu_hl_s0:
+ case M2_mpyu_hl_s1:
+ case M2_mpyu_nac_hl_s0:
+ case M2_mpyu_nac_hl_s1:
+ case M2_mpyud_acc_hl_s0:
+ case M2_mpyud_acc_hl_s1:
+ case M2_mpyud_hl_s0:
+ case M2_mpyud_hl_s1:
+ case M2_mpyud_nac_hl_s0:
+ case M2_mpyud_nac_hl_s1:
+ if (OpN == 1) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ if (OpN == 2) {
+ Bits.set(Begin, Begin+16);
+ return true;
+ }
+ break;
+
+ // Two register sources, used bits: R1[16-31], R2[16-31].
+ case A2_addh_h16_hh:
+ case A2_addh_h16_sat_hh:
+ case A2_combine_hh:
+ case A2_subh_h16_hh:
+ case A2_subh_h16_sat_hh:
+ case M2_mpy_acc_hh_s0:
+ case M2_mpy_acc_hh_s1:
+ case M2_mpy_acc_sat_hh_s0:
+ case M2_mpy_acc_sat_hh_s1:
+ case M2_mpy_hh_s0:
+ case M2_mpy_hh_s1:
+ case M2_mpy_nac_hh_s0:
+ case M2_mpy_nac_hh_s1:
+ case M2_mpy_nac_sat_hh_s0:
+ case M2_mpy_nac_sat_hh_s1:
+ case M2_mpy_rnd_hh_s0:
+ case M2_mpy_rnd_hh_s1:
+ case M2_mpy_sat_hh_s0:
+ case M2_mpy_sat_hh_s1:
+ case M2_mpy_sat_rnd_hh_s0:
+ case M2_mpy_sat_rnd_hh_s1:
+ case M2_mpyd_acc_hh_s0:
+ case M2_mpyd_acc_hh_s1:
+ case M2_mpyd_hh_s0:
+ case M2_mpyd_hh_s1:
+ case M2_mpyd_nac_hh_s0:
+ case M2_mpyd_nac_hh_s1:
+ case M2_mpyd_rnd_hh_s0:
+ case M2_mpyd_rnd_hh_s1:
+ case M2_mpyu_acc_hh_s0:
+ case M2_mpyu_acc_hh_s1:
+ case M2_mpyu_hh_s0:
+ case M2_mpyu_hh_s1:
+ case M2_mpyu_nac_hh_s0:
+ case M2_mpyu_nac_hh_s1:
+ case M2_mpyud_acc_hh_s0:
+ case M2_mpyud_acc_hh_s1:
+ case M2_mpyud_hh_s0:
+ case M2_mpyud_hh_s1:
+ case M2_mpyud_nac_hh_s0:
+ case M2_mpyud_nac_hh_s1:
+ if (OpN == 1 || OpN == 2) {
+ Bits.set(Begin+16, Begin+32);
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
+
+// Calculate the register class that matches Reg:Sub. For example, if
+// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// register class.
+const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
+ const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return nullptr;
+ auto *RC = MRI.getRegClass(RR.Reg);
+ if (RR.Sub == 0)
+ return RC;
+
+ auto VerifySR = [] (unsigned Sub) -> void {
+ assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+ };
+
+ switch (RC->getID()) {
+ case Hexagon::DoubleRegsRegClassID:
+ VerifySR(RR.Sub);
+ return &Hexagon::IntRegsRegClass;
+ }
+ return nullptr;
+}
+
+
+// Check if RD could be replaced with RS at any possible use of RD.
+// For example a predicate register cannot be replaced with a integer
+// register, but a 64-bit register with a subregister can be replaced
+// with a 32-bit register.
+bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
+ const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
+ !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+ return false;
+ // Return false if one (or both) classes are nullptr.
+ auto *DRC = getFinalVRegClass(RD, MRI);
+ if (!DRC)
+ return false;
+
+ return DRC == getFinalVRegClass(RS, MRI);
+}
+
+
+//
+// Dead code elimination
+//
+namespace {
+ class DeadCodeElimination {
+ public:
+ DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
+ : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+ MDT(mdt), MRI(mf.getRegInfo()) {}
+
+ bool run() {
+ return runOnNode(MDT.getRootNode());
+ }
+
+ private:
+ bool isDead(unsigned R) const;
+ bool runOnNode(MachineDomTreeNode *N);
+
+ MachineFunction &MF;
+ const HexagonInstrInfo &HII;
+ MachineDominatorTree &MDT;
+ MachineRegisterInfo &MRI;
+ };
+}
+
+
+bool DeadCodeElimination::isDead(unsigned R) const {
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ MachineInstr *UseI = I->getParent();
+ if (UseI->isDebugValue())
+ continue;
+ if (UseI->isPHI()) {
+ assert(!UseI->getOperand(0).getSubReg());
+ unsigned DR = UseI->getOperand(0).getReg();
+ if (DR == R)
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+
+bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
+ bool Changed = false;
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+ Changed |= runOnNode(*I);
+
+ MachineBasicBlock *B = N->getBlock();
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ for (auto MI : Instrs) {
+ unsigned Opc = MI->getOpcode();
+ // Do not touch lifetime markers. This is why the target-independent DCE
+ // cannot be used.
+ if (Opc == TargetOpcode::LIFETIME_START ||
+ Opc == TargetOpcode::LIFETIME_END)
+ continue;
+ bool Store = false;
+ if (MI->isInlineAsm())
+ continue;
+ // Delete PHIs if possible.
+ if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
+ continue;
+
+ bool AllDead = true;
+ SmallVector<unsigned,2> Regs;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+ AllDead = false;
+ break;
+ }
+ Regs.push_back(R);
+ }
+ if (!AllDead)
+ continue;
+
+ B->erase(MI);
+ for (unsigned i = 0, n = Regs.size(); i != n; ++i)
+ MRI.markUsesInDebugValueAsUndef(Regs[i]);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+//
+// Eliminate redundant instructions
+//
+// This transformation will identify instructions where the output register
+// is the same as one of its input registers. This only works on instructions
+// that define a single register (unlike post-increment loads, for example).
+// The equality check is actually more detailed: the code calculates which
+// bits of the output are used, and only compares these bits with the input
+// registers.
+// If the output matches an input, the instruction is replaced with COPY.
+// The copies will be removed by another transformation.
+namespace {
+ class RedundantInstrElimination : public Transformation {
+ public:
+ RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
+ unsigned &LostB, unsigned &LostE);
+ bool computeUsedBits(unsigned Reg, BitVector &Bits);
+ bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
+ uint16_t Begin);
+ bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the instruction is a lossy shift left, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN, Width;
+ switch (Opc) {
+ case S2_asl_i_p:
+ ImN = 2;
+ RegN = 1;
+ Width = 64;
+ break;
+ case S2_asl_i_p_acc:
+ case S2_asl_i_p_and:
+ case S2_asl_i_p_nac:
+ case S2_asl_i_p_or:
+ case S2_asl_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 64;
+ break;
+ case S2_asl_i_r:
+ ImN = 2;
+ RegN = 1;
+ Width = 32;
+ break;
+ case S2_addasl_rrri:
+ case S4_andi_asl_ri:
+ case S4_ori_asl_ri:
+ case S4_addi_asl_ri:
+ case S4_subi_asl_ri:
+ case S2_asl_i_r_acc:
+ case S2_asl_i_r_and:
+ case S2_asl_i_r_nac:
+ case S2_asl_i_r_or:
+ case S2_asl_i_r_sat:
+ case S2_asl_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ Width = 32;
+ break;
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ if (S == 0)
+ return false;
+ LostB = Width-S;
+ LostE = Width;
+ return true;
+}
+
+
+// Check if the instruction is a lossy shift right, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
+ unsigned OpN, unsigned &LostB, unsigned &LostE) {
+ using namespace Hexagon;
+ unsigned Opc = MI.getOpcode();
+ unsigned ImN, RegN;
+ switch (Opc) {
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S2_asr_i_p_acc:
+ case S2_asr_i_p_and:
+ case S2_asr_i_p_nac:
+ case S2_asr_i_p_or:
+ case S2_lsr_i_p_acc:
+ case S2_lsr_i_p_and:
+ case S2_lsr_i_p_nac:
+ case S2_lsr_i_p_or:
+ case S2_lsr_i_p_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+ case S2_asr_i_r:
+ case S2_lsr_i_r:
+ ImN = 2;
+ RegN = 1;
+ break;
+ case S4_andi_lsr_ri:
+ case S4_ori_lsr_ri:
+ case S4_addi_lsr_ri:
+ case S4_subi_lsr_ri:
+ case S2_asr_i_r_acc:
+ case S2_asr_i_r_and:
+ case S2_asr_i_r_nac:
+ case S2_asr_i_r_or:
+ case S2_lsr_i_r_acc:
+ case S2_lsr_i_r_and:
+ case S2_lsr_i_r_nac:
+ case S2_lsr_i_r_or:
+ case S2_lsr_i_r_xacc:
+ ImN = 3;
+ RegN = 2;
+ break;
+
+ default:
+ return false;
+ }
+
+ if (RegN != OpN)
+ return false;
+
+ assert(MI.getOperand(ImN).isImm());
+ unsigned S = MI.getOperand(ImN).getImm();
+ LostB = 0;
+ LostE = S;
+ return true;
+}
+
+
+// Calculate the bit vector that corresponds to the used bits of register Reg.
+// The vector Bits has the same size, as the size of Reg in bits. If the cal-
+// culation fails (i.e. the used bits are unknown), it returns false. Other-
+// wise, it returns true and sets the corresponding bits in Bits.
+bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
+ BitVector Used(Bits.size());
+ RegisterSet Visited;
+ std::vector<unsigned> Pending;
+ Pending.push_back(Reg);
+
+ for (unsigned i = 0; i < Pending.size(); ++i) {
+ unsigned R = Pending[i];
+ if (Visited.has(R))
+ continue;
+ Visited.insert(R);
+ for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+ BitTracker::RegisterRef UR = *I;
+ unsigned B, W;
+ if (!HBS::getSubregMask(UR, B, W, MRI))
+ return false;
+ MachineInstr &UseI = *I->getParent();
+ if (UseI.isPHI() || UseI.isCopy()) {
+ unsigned DefR = UseI.getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ return false;
+ Pending.push_back(DefR);
+ } else {
+ if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
+ return false;
+ }
+ }
+ }
+ Bits |= Used;
+ return true;
+}
+
+
+// Calculate the bits used by instruction MI in a register in operand OpN.
+// Return true/false if the calculation succeeds/fails. If is succeeds, set
+// used bits in Bits. This function does not reset any bits in Bits, so
+// subsequent calls over different instructions will result in the union
+// of the used bits in all these instructions.
+// The register in question may be used with a sub-register, whereas Bits
+// holds the bits for the entire register. To keep track of that, the
+// argument Begin indicates where in Bits is the lowest-significant bit
+// of the register used in operand OpN. For example, in instruction:
+// vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+// the operand 1 is a 32-bit register, which happens to be a subregister
+// of the 64-bit register vreg2, and that subregister starts at position 32.
+// In this case Begin=32, since Bits[32] would be the lowest-significant bit
+// of vreg2:subreg_hireg.
+bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
+ unsigned OpN, BitVector &Bits, uint16_t Begin) {
+ unsigned Opc = MI.getOpcode();
+ BitVector T(Bits.size());
+ bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
+ // Even if we don't have bits yet, we could still provide some information
+ // if the instruction is a lossy shift: the lost bits will be marked as
+ // not used.
+ unsigned LB, LE;
+ if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
+ assert(MI.getOperand(OpN).isReg());
+ BitTracker::RegisterRef RR = MI.getOperand(OpN);
+ const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
+ uint16_t Width = RC->getSize()*8;
+
+ if (!GotBits)
+ T.set(Begin, Begin+Width);
+ assert(LB <= LE && LB < Width && LE <= Width);
+ T.reset(Begin+LB, Begin+LE);
+ GotBits = true;
+ }
+ if (GotBits)
+ Bits |= T;
+ return GotBits;
+}
+
+
+// Calculates the used bits in RD ("defined register"), and checks if these
+// bits in RS ("used register") and RD are identical.
+bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
+ BitTracker::RegisterRef RS) {
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+
+ unsigned DB, DW;
+ if (!HBS::getSubregMask(RD, DB, DW, MRI))
+ return false;
+ unsigned SB, SW;
+ if (!HBS::getSubregMask(RS, SB, SW, MRI))
+ return false;
+ if (SW != DW)
+ return false;
+
+ BitVector Used(DC.width());
+ if (!computeUsedBits(RD.Reg, Used))
+ return false;
+
+ for (unsigned i = 0; i != DW; ++i)
+ if (Used[i+DB] && DC[DB+i] != SC[SB+i])
+ return false;
+ return true;
+}
+
+
+bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
+ const RegisterSet&) {
+ bool Changed = false;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+
+ if (MI->getOpcode() == TargetOpcode::COPY)
+ continue;
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ continue;
+ unsigned NumD = MI->getDesc().getNumDefs();
+ if (NumD != 1)
+ continue;
+
+ BitTracker::RegisterRef RD = MI->getOperand(0);
+ if (!BT.has(RD.Reg))
+ continue;
+ const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+
+ // Find a source operand that is equal to the result.
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ continue;
+
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
+ continue;
+
+ // If found, replace the instruction with a COPY.
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), SC);
+ Changed = true;
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+
+//
+// Const generation
+//
+// Recognize instructions that produce constant values known at compile-time.
+// Replace them with register definitions that load these constants directly.
+namespace {
+ class ConstGeneration : public Transformation {
+ public:
+ ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool isTfrConst(const MachineInstr *MI) const;
+ bool isConst(unsigned R, int64_t &V) const;
+ unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
+ if (!BT.has(R))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ int64_t T = 0;
+ for (unsigned i = RC.width(); i > 0; --i) {
+ const BitTracker::BitValue &V = RC[i-1];
+ T <<= 1;
+ if (V.is(1))
+ T |= 1;
+ else if (!V.is(0))
+ return false;
+ }
+ C = T;
+ return true;
+}
+
+
+bool ConstGeneration::isTfrConst(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_tfrsi:
+ case Hexagon::A2_tfrpi:
+ case Hexagon::TFR_PdTrue:
+ case Hexagon::TFR_PdFalse:
+ case Hexagon::CONST32_Int_Real:
+ case Hexagon::CONST64_Int_Real:
+ return true;
+ }
+ return false;
+}
+
+
+// Generate a transfer-immediate instruction that is appropriate for the
+// register class and the actual value being transferred.
+unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+ MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+ unsigned Reg = MRI.createVirtualRegister(RC);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
+ .addImm(int32_t(C));
+ return Reg;
+ }
+
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ if (isInt<8>(C)) {
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ unsigned Lo = Lo_32(C), Hi = Hi_32(C);
+ if (isInt<8>(Lo) || isInt<8>(Hi)) {
+ unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
+ : Hexagon::A4_combineii;
+ BuildMI(B, At, DL, HII.get(Opc), Reg)
+ .addImm(int32_t(Hi))
+ .addImm(int32_t(Lo));
+ return Reg;
+ }
+
+ BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+ .addImm(C);
+ return Reg;
+ }
+
+ if (RC == &Hexagon::PredRegsRegClass) {
+ unsigned Opc;
+ if (C == 0)
+ Opc = Hexagon::TFR_PdFalse;
+ else if ((C & 0xFF) == 0xFF)
+ Opc = Hexagon::TFR_PdTrue;
+ else
+ return 0;
+ BuildMI(B, At, DL, HII.get(Opc), Reg);
+ return Reg;
+ }
+
+ return 0;
+}
+
+
+bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I) {
+ if (isTfrConst(I))
+ continue;
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DR))
+ continue;
+ int64_t C;
+ if (isConst(DR, C)) {
+ DebugLoc DL = I->getDebugLoc();
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+ if (ImmReg) {
+ HBS::replaceReg(DR, ImmReg, MRI);
+ BT.put(ImmReg, BT.lookup(DR));
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+
+//
+// Copy generation
+//
+// Identify pairs of available registers which hold identical values.
+// In such cases, only one of them needs to be calculated, the other one
+// will be defined as a copy of the first.
+//
+// Copy propagation
+//
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
+namespace {
+ class CopyGeneration : public Transformation {
+ public:
+ CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ bool findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+
+ class CopyPropagation : public Transformation {
+ public:
+ CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+ : Transformation(false), MRI(mri) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ static bool isCopyReg(unsigned Opc);
+ private:
+ bool propagateRegCopy(MachineInstr &MI);
+
+ MachineRegisterInfo &MRI;
+ };
+
+}
+
+
+/// Check if there is a register in AVs that is identical to Inp. If so,
+/// set Out to the found register. The output may be a pair Reg:Sub.
+bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
+ BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
+ if (!BT.has(Inp.Reg))
+ return false;
+ const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+ unsigned B, W;
+ if (!HBS::getSubregMask(Inp, B, W, MRI))
+ return false;
+
+ for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+ if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+ continue;
+ const BitTracker::RegisterCell &RC = BT.lookup(R);
+ unsigned RW = RC.width();
+ if (W == RW) {
+ if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+ continue;
+ if (!HBS::isEqual(InpRC, B, RC, 0, W))
+ continue;
+ Out.Reg = R;
+ Out.Sub = 0;
+ return true;
+ }
+ // Check if there is a super-register, whose part (with a subregister)
+ // is equal to the input.
+ // Only do double registers for now.
+ if (W*2 != RW)
+ continue;
+ if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
+ continue;
+
+ if (HBS::isEqual(InpRC, B, RC, 0, W))
+ Out.Sub = Hexagon::subreg_loreg;
+ else if (HBS::isEqual(InpRC, B, RC, W, W))
+ Out.Sub = Hexagon::subreg_hireg;
+ else
+ continue;
+ Out.Reg = R;
+ return true;
+ }
+ return false;
+}
+
+
+bool CopyGeneration::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ RegisterSet AVB(AVs);
+ bool Changed = false;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
+ ++I, AVB.insert(Defs)) {
+ NextI = std::next(I);
+ Defs.clear();
+ HBS::getInstrDefs(*I, Defs);
+
+ unsigned Opc = I->getOpcode();
+ if (CopyPropagation::isCopyReg(Opc))
+ continue;
+
+ for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+ BitTracker::RegisterRef MR;
+ if (!findMatch(R, MR, AVB))
+ continue;
+ DebugLoc DL = I->getDebugLoc();
+ auto *FRC = HBS::getFinalVRegClass(MR, MRI);
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+ .addReg(MR.Reg, 0, MR.Sub);
+ BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+ }
+ }
+
+ return Changed;
+}
+
+
+bool CopyPropagation::isCopyReg(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case TargetOpcode::REG_SEQUENCE:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
+ bool Changed = false;
+ unsigned Opc = MI.getOpcode();
+ BitTracker::RegisterRef RD = MI.getOperand(0);
+ assert(MI.getOperand(0).getSubReg() == 0);
+
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::A2_tfr:
+ case Hexagon::A2_tfrp: {
+ BitTracker::RegisterRef RS = MI.getOperand(1);
+ if (!HBS::isTransparentCopy(RD, RS, MRI))
+ break;
+ if (RS.Sub != 0)
+ Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
+ else
+ Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
+ break;
+ }
+ case TargetOpcode::REG_SEQUENCE: {
+ BitTracker::RegisterRef SL, SH;
+ if (HBS::parseRegSequence(MI, SL, SH)) {
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ SL.Reg, SL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ SH.Reg, SH.Sub, MRI);
+ }
+ break;
+ }
+ case Hexagon::A2_combinew: {
+ BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+ RL.Reg, RL.Sub, MRI);
+ Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+ RH.Reg, RH.Sub, MRI);
+ break;
+ }
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineri: {
+ unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
+ unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
+ : Hexagon::subreg_hireg;
+ BitTracker::RegisterRef RS = MI.getOperand(SrcX);
+ Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
+ break;
+ }
+ }
+ return Changed;
+}
+
+
+bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+ std::vector<MachineInstr*> Instrs;
+ for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
+ Instrs.push_back(&*I);
+
+ bool Changed = false;
+ for (auto I : Instrs) {
+ unsigned Opc = I->getOpcode();
+ if (!CopyPropagation::isCopyReg(Opc))
+ continue;
+ Changed |= propagateRegCopy(*I);
+ }
+
+ return Changed;
+}
+
+
+//
+// Bit simplification
+//
+// Recognize patterns that can be simplified and replace them with the
+// simpler forms.
+// This is by no means complete
+namespace {
+ class BitSimplification : public Transformation {
+ public:
+ BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
+ MachineRegisterInfo &mri)
+ : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+ bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+ private:
+ struct RegHalf : public BitTracker::RegisterRef {
+ bool Low; // Low/High halfword.
+ };
+
+ bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ unsigned B, RegHalf &RH);
+
+ bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
+ BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
+ unsigned getCombineOpcode(bool HLow, bool LLow);
+
+ bool genStoreUpperHalf(MachineInstr *MI);
+ bool genStoreImmediate(MachineInstr *MI);
+ bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+ bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC);
+
+ const HexagonInstrInfo &HII;
+ MachineRegisterInfo &MRI;
+ BitTracker &BT;
+ };
+}
+
+
+// Check if the bits [B..B+16) in register cell RC form a valid halfword,
+// i.e. [0..16), [16..32), etc. of some register. If so, return true and
+// set the information about the found register in RH.
+bool BitSimplification::matchHalf(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
+ // XXX This could be searching in the set of available registers, in case
+ // the match is not exact.
+
+ // Match 16-bit chunks, where the RC[B..B+15] references exactly one
+ // register and all the bits B..B+15 match between RC and the register.
+ // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
+ // and RC = { [0]:0 [1-15]:v1[1-15]... }.
+ bool Low = false;
+ unsigned I = B;
+ while (I < B+16 && RC[I].num())
+ I++;
+ if (I == B+16)
+ return false;
+
+ unsigned Reg = RC[I].RefI.Reg;
+ unsigned P = RC[I].RefI.Pos; // The RefI.Pos will be advanced by I-B.
+ if (P < I-B)
+ return false;
+ unsigned Pos = P - (I-B);
+
+ if (Reg == 0 || Reg == SelfR) // Don't match "self".
+ return false;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return false;
+ if (!BT.has(Reg))
+ return false;
+
+ const BitTracker::RegisterCell &SC = BT.lookup(Reg);
+ if (Pos+16 > SC.width())
+ return false;
+
+ for (unsigned i = 0; i < 16; ++i) {
+ const BitTracker::BitValue &RV = RC[i+B];
+ if (RV.Type == BitTracker::BitValue::Ref) {
+ if (RV.RefI.Reg != Reg)
+ return false;
+ if (RV.RefI.Pos != i+Pos)
+ return false;
+ continue;
+ }
+ if (RC[i+B] != SC[i+Pos])
+ return false;
+ }
+
+ unsigned Sub = 0;
+ switch (Pos) {
+ case 0:
+ Sub = Hexagon::subreg_loreg;
+ Low = true;
+ break;
+ case 16:
+ Sub = Hexagon::subreg_loreg;
+ Low = false;
+ break;
+ case 32:
+ Sub = Hexagon::subreg_hireg;
+ Low = true;
+ break;
+ case 48:
+ Sub = Hexagon::subreg_hireg;
+ Low = false;
+ break;
+ default:
+ return false;
+ }
+
+ RH.Reg = Reg;
+ RH.Sub = Sub;
+ RH.Low = Low;
+ // If the subregister is not valid with the register, set it to 0.
+ if (!HBS::getFinalVRegClass(RH, MRI))
+ RH.Sub = 0;
+
+ return true;
+}
+
+
+// Check if RC matches the pattern of a S2_packhl. If so, return true and
+// set the inputs Rs and Rt.
+bool BitSimplification::matchPackhl(unsigned SelfR,
+ const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
+ BitTracker::RegisterRef &Rt) {
+ RegHalf L1, H1, L2, H2;
+
+ if (!matchHalf(SelfR, RC, 0, L2) || !matchHalf(SelfR, RC, 16, L1))
+ return false;
+ if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
+ return false;
+
+ // Rs = H1.L1, Rt = H2.L2
+ if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
+ return false;
+ if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
+ return false;
+
+ Rs = H1;
+ Rt = H2;
+ return true;
+}
+
+
+unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
+ return HLow ? LLow ? Hexagon::A2_combine_ll
+ : Hexagon::A2_combine_lh
+ : LLow ? Hexagon::A2_combine_hl
+ : Hexagon::A2_combine_hh;
+}
+
+
+// If MI stores the upper halfword of a register (potentially obtained via
+// shifts or extracts), replace it with a storerf instruction. This could
+// cause the "extraction" code to become dead.
+bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_storerh_io)
+ return false;
+
+ MachineOperand &ValOp = MI->getOperand(2);
+ BitTracker::RegisterRef RS = ValOp;
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ RegHalf H;
+ if (!matchHalf(0, RC, 0, H))
+ return false;
+ if (H.Low)
+ return false;
+ MI->setDesc(HII.get(Hexagon::S2_storerf_io));
+ ValOp.setReg(H.Reg);
+ ValOp.setSubReg(H.Sub);
+ return true;
+}
+
+
+// If MI stores a value known at compile-time, and the value is within a range
+// that avoids using constant-extenders, replace it with a store-immediate.
+bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ unsigned Align = 0;
+ switch (Opc) {
+ case Hexagon::S2_storeri_io:
+ Align++;
+ case Hexagon::S2_storerh_io:
+ Align++;
+ case Hexagon::S2_storerb_io:
+ break;
+ default:
+ return false;
+ }
+
+ // Avoid stores to frame-indices (due to an unknown offset).
+ if (!MI->getOperand(0).isReg())
+ return false;
+ MachineOperand &OffOp = MI->getOperand(1);
+ if (!OffOp.isImm())
+ return false;
+
+ int64_t Off = OffOp.getImm();
+ // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
+ if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
+ return false;
+ // Source register:
+ BitTracker::RegisterRef RS = MI->getOperand(2);
+ if (!BT.has(RS.Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+ uint64_t U;
+ if (!HBS::getConst(RC, 0, RC.width(), U))
+ return false;
+
+ // Only consider 8-bit values to avoid constant-extenders.
+ int V;
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ V = int8_t(U);
+ break;
+ case Hexagon::S2_storerh_io:
+ V = int16_t(U);
+ break;
+ case Hexagon::S2_storeri_io:
+ V = int32_t(U);
+ break;
+ }
+ if (!isInt<8>(V))
+ return false;
+
+ MI->RemoveOperand(2);
+ switch (Opc) {
+ case Hexagon::S2_storerb_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
+ break;
+ case Hexagon::S2_storerh_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
+ break;
+ case Hexagon::S2_storeri_io:
+ MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
+ break;
+ }
+ MI->addOperand(MachineOperand::CreateImm(V));
+ return true;
+}
+
+
+// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
+// last instruction in a sequence that results in something equivalent to
+// the pack-halfwords. The intent is to cause the entire sequence to become
+// dead.
+bool BitSimplification::genPackhl(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == Hexagon::S2_packhl)
+ return false;
+ BitTracker::RegisterRef Rs, Rt;
+ if (!matchPackhl(RD.Reg, RC, Rs, Rt))
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+ .addReg(Rs.Reg, 0, Rs.Sub)
+ .addReg(Rt.Reg, 0, Rt.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI produces halfword of the input in the low half of the output,
+// replace it with zero-extend or extractu.
+bool BitSimplification::genExtractHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L;
+ // Check for halfword in low 16 bits, zeros elsewhere.
+ if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Prefer zxth, since zxth can go in any slot, while extractu only in
+ // slots 2 and 3.
+ unsigned NewR = 0;
+ if (L.Low && Opc != Hexagon::A2_zxth) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+ .addReg(L.Reg, 0, L.Sub);
+ } else if (!L.Low && Opc != Hexagon::S2_extractu) {
+ NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR)
+ .addReg(L.Reg, 0, L.Sub)
+ .addImm(16)
+ .addImm(16);
+ }
+ if (NewR == 0)
+ return false;
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
+// combine.
+bool BitSimplification::genCombineHalf(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ RegHalf L, H;
+ // Check for combine h/l
+ if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
+ return false;
+ // Do nothing if this is just a reg copy.
+ if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ unsigned COpc = getCombineOpcode(H.Low, L.Low);
+ if (COpc == Opc)
+ return false;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(COpc), NewR)
+ .addReg(H.Reg, 0, H.Sub)
+ .addReg(L.Reg, 0, L.Sub);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+}
+
+
+// If MI resets high bits of a register and keeps the lower ones, replace it
+// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
+bool BitSimplification::genExtractLow(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ case Hexagon::S2_extractu:
+ return false;
+ }
+ if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
+ int32_t Imm = MI->getOperand(2).getImm();
+ if (isInt<10>(Imm))
+ return false;
+ }
+
+ if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+ return false;
+ unsigned W = RC.width();
+ while (W > 0 && RC[W-1].is(0))
+ W--;
+ if (W == 0 || W == RC.width())
+ return false;
+ unsigned NewOpc = (W == 8) ? Hexagon::A2_zxtb
+ : (W == 16) ? Hexagon::A2_zxth
+ : (W < 10) ? Hexagon::A2_andir
+ : Hexagon::S2_extractu;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->uses()) {
+ if (!Op.isReg())
+ continue;
+ BitTracker::RegisterRef RS = Op;
+ if (!BT.has(RS.Reg))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ unsigned BN, BW;
+ if (!HBS::getSubregMask(RS, BN, BW, MRI))
+ continue;
+ if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
+ continue;
+
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+ .addReg(RS.Reg, 0, RS.Sub);
+ if (NewOpc == Hexagon::A2_andir)
+ MIB.addImm((1 << W) - 1);
+ else if (NewOpc == Hexagon::S2_extractu)
+ MIB.addImm(W).addImm(0);
+ HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ return true;
+ }
+ return false;
+}
+
+
+// Check for tstbit simplification opportunity, where the bit being checked
+// can be tracked back to another register. For example:
+// vreg2 = S2_lsr_i_r vreg1, 5
+// vreg3 = S2_tstbit_i vreg2, 0
+// =>
+// vreg3 = S2_tstbit_i vreg1, 5
+bool BitSimplification::simplifyTstbit(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc != Hexagon::S2_tstbit_i)
+ return false;
+
+ unsigned BN = MI->getOperand(2).getImm();
+ BitTracker::RegisterRef RS = MI->getOperand(1);
+ unsigned F, W;
+ DebugLoc DL = MI->getDebugLoc();
+ if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
+ return false;
+ MachineBasicBlock &B = *MI->getParent();
+
+ const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+ const BitTracker::BitValue &V = SC[F+BN];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
+ const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
+ // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
+ // a double register, need to use a subregister and adjust bit
+ // number.
+ unsigned P = UINT_MAX;
+ BitTracker::RegisterRef RR(V.RefI.Reg, 0);
+ if (TC == &Hexagon::DoubleRegsRegClass) {
+ P = V.RefI.Pos;
+ RR.Sub = Hexagon::subreg_loreg;
+ if (P >= 32) {
+ P -= 32;
+ RR.Sub = Hexagon::subreg_hireg;
+ }
+ } else if (TC == &Hexagon::IntRegsRegClass) {
+ P = V.RefI.Pos;
+ }
+ if (P != UINT_MAX) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+ .addReg(RR.Reg, 0, RR.Sub)
+ .addImm(P);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BT.put(NewR, RC);
+ return true;
+ }
+ } else if (V.is(0) || V.is(1)) {
+ unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+ BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ return true;
+ }
+
+ return false;
+}
+
+
+bool BitSimplification::processBlock(MachineBasicBlock &B,
+ const RegisterSet &AVs) {
+ bool Changed = false;
+ RegisterSet AVB = AVs;
+ RegisterSet Defs;
+
+ for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
+ MachineInstr *MI = &*I;
+ Defs.clear();
+ HBS::getInstrDefs(*MI, Defs);
+
+ unsigned Opc = MI->getOpcode();
+ if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+ continue;
+
+ if (MI->mayStore()) {
+ bool T = genStoreUpperHalf(MI);
+ T = T || genStoreImmediate(MI);
+ Changed |= T;
+ continue;
+ }
+
+ if (Defs.count() != 1)
+ continue;
+ const MachineOperand &Op0 = MI->getOperand(0);
+ if (!Op0.isReg() || !Op0.isDef())
+ continue;
+ BitTracker::RegisterRef RD = Op0;
+ if (!BT.has(RD.Reg))
+ continue;
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
+
+ if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
+ bool T = genPackhl(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::IntRegsRegClassID) {
+ bool T = genExtractHalf(MI, RD, RC);
+ T = T || genCombineHalf(MI, RD, RC);
+ T = T || genExtractLow(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+
+ if (FRC->getID() == Hexagon::PredRegsRegClassID) {
+ bool T = simplifyTstbit(MI, RD, RC);
+ Changed |= T;
+ continue;
+ }
+ }
+ return Changed;
+}
+
+
+bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HRI = *HST.getRegisterInfo();
+ auto &HII = *HST.getInstrInfo();
+
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ bool Changed;
+
+ Changed = DeadCodeElimination(MF, *MDT).run();
+
+ const HexagonEvaluator HE(HRI, MRI, HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+
+ MachineBasicBlock &Entry = MF.front();
+
+ RegisterSet AIG; // Available registers for IG.
+ ConstGeneration ImmG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, ImmG, AIG);
+
+ RegisterSet ARE; // Available registers for RIE.
+ RedundantInstrElimination RIE(BT, HII, MRI);
+ Changed |= visitBlock(Entry, RIE, ARE);
+
+ RegisterSet ACG; // Available registers for CG.
+ CopyGeneration CopyG(BT, HII, MRI);
+ Changed |= visitBlock(Entry, CopyG, ACG);
+
+ RegisterSet ACP; // Available registers for CP.
+ CopyPropagation CopyP(HRI, MRI);
+ Changed |= visitBlock(Entry, CopyP, ACP);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ BT.run();
+ RegisterSet ABS; // Available registers for BS.
+ BitSimplification BitS(BT, HII, MRI);
+ Changed |= visitBlock(Entry, BitS, ABS);
+
+ Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+ if (Changed) {
+ for (auto &B : MF)
+ for (auto &I : B)
+ I.clearKillInfo();
+ DeadCodeElimination(MF, *MDT).run();
+ }
+ return Changed;
+}
+
+
+// Recognize loops where the code at the end of the loop matches the code
+// before the entry of the loop, and the matching code is such that is can
+// be simplified. This pass relies on the bit simplification above and only
+// prepares code in a way that can be handled by the bit simplifcation.
+//
+// This is the motivating testcase (and explanation):
+//
+// {
+// loop0(.LBB0_2, r1) // %for.body.preheader
+// r5:4 = memd(r0++#8)
+// }
+// {
+// r3 = lsr(r4, #16)
+// r7:6 = combine(r5, r5)
+// }
+// {
+// r3 = insert(r5, #16, #16)
+// r7:6 = vlsrw(r7:6, #16)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r6 # R6 is really R5.H
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r3 # R3 is really R4.H
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }
+// { # "Shuffling" code that sets up R3 and R6
+// r3 = lsr(r4, #16) # so that their halves can be stored in the
+// r7:6 = combine(r5, r5) # next iteration. This could be folded into
+// } # the stores if the code was at the beginning
+// { # of the loop iteration. Since the same code
+// r3 = insert(r5, #16, #16) # precedes the loop, it can actually be moved
+// r7:6 = vlsrw(r7:6, #16) # there.
+// }:endloop0
+//
+//
+// The outcome:
+//
+// {
+// loop0(.LBB0_2, r1)
+// r5:4 = memd(r0++#8)
+// }
+// .LBB0_2:
+// {
+// memh(r2+#4) = r5
+// memh(r2+#6) = r5.h
+// }
+// {
+// r2 = add(r2, #8)
+// memh(r2+#0) = r4
+// memh(r2+#2) = r4.h
+// }
+// {
+// r5:4 = memd(r0++#8)
+// }:endloop0
+
+namespace llvm {
+ FunctionPass *createHexagonLoopRescheduling();
+ void initializeHexagonLoopReschedulingPass(PassRegistry&);
+}
+
+namespace {
+ class HexagonLoopRescheduling : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonLoopRescheduling() : MachineFunctionPass(ID),
+ HII(0), HRI(0), MRI(0), BTP(0) {
+ initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+ MachineRegisterInfo *MRI;
+ BitTracker *BTP;
+
+ struct LoopCand {
+ LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
+ MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
+ MachineBasicBlock *LB, *PB, *EB;
+ };
+ typedef std::vector<MachineInstr*> InstrList;
+ struct InstrGroup {
+ BitTracker::RegisterRef Inp, Out;
+ InstrList Ins;
+ };
+ struct PhiInfo {
+ PhiInfo(MachineInstr &P, MachineBasicBlock &B);
+ unsigned DefR;
+ BitTracker::RegisterRef LR, PR;
+ MachineBasicBlock *LB, *PB;
+ };
+
+ static unsigned getDefReg(const MachineInstr *MI);
+ bool isConst(unsigned Reg) const;
+ bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
+ bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
+ bool isShuffleOf(unsigned OutR, unsigned InpR) const;
+ bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
+ unsigned &InpR2) const;
+ void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
+ MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
+ bool processLoop(LoopCand &C);
+ };
+}
+
+char HexagonLoopRescheduling::ID = 0;
+
+INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
+ "Hexagon Loop Rescheduling", false, false)
+
+
+HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
+ MachineBasicBlock &B) {
+ DefR = HexagonLoopRescheduling::getDefReg(&P);
+ LB = &B;
+ PB = nullptr;
+ for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
+ const MachineOperand &OpB = P.getOperand(i+1);
+ if (OpB.getMBB() == &B) {
+ LR = P.getOperand(i);
+ continue;
+ }
+ PB = OpB.getMBB();
+ PR = P.getOperand(i);
+ }
+}
+
+
+unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ if (Defs.count() != 1)
+ return 0;
+ return Defs.find_first();
+}
+
+
+bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
+ if (!BTP->has(Reg))
+ return false;
+ const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
+ for (unsigned i = 0, w = RC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (!V.is(0) && !V.is(1))
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
+ unsigned DefR) const {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::COPY:
+ case Hexagon::S2_lsr_i_r:
+ case Hexagon::S2_asr_i_r:
+ case Hexagon::S2_asl_i_r:
+ case Hexagon::S2_lsr_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_insert:
+ case Hexagon::A2_or:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_and:
+ case Hexagon::A2_andp:
+ case Hexagon::A2_combinew:
+ case Hexagon::A4_combineri:
+ case Hexagon::A4_combineir:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A2_combine_ll:
+ case Hexagon::A2_combine_lh:
+ case Hexagon::A2_combine_hl:
+ case Hexagon::A2_combine_hh:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
+ unsigned InpR) const {
+ for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+ const MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg())
+ continue;
+ if (Op.getReg() == InpR)
+ return i == n-1;
+ }
+ return false;
+}
+
+
+bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
+ if (!BTP->has(OutR) || !BTP->has(InpR))
+ return false;
+ const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
+ for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
+ const BitTracker::BitValue &V = OutC[i];
+ if (V.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V.RefI.Reg != InpR)
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
+ unsigned OutR2, unsigned &InpR2) const {
+ if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
+ return false;
+ const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
+ const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
+ unsigned W = OutC1.width();
+ unsigned MatchR = 0;
+ if (W != OutC2.width())
+ return false;
+ for (unsigned i = 0; i < W; ++i) {
+ const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
+ if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
+ return false;
+ if (V1.Type != BitTracker::BitValue::Ref)
+ continue;
+ if (V1.RefI.Pos != V2.RefI.Pos)
+ return false;
+ if (V1.RefI.Reg != InpR1)
+ return false;
+ if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
+ return false;
+ if (!MatchR)
+ MatchR = V2.RefI.Reg;
+ else if (V2.RefI.Reg != MatchR)
+ return false;
+ }
+ InpR2 = MatchR;
+ return true;
+}
+
+
+void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
+ MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
+ unsigned NewPredR) {
+ DenseMap<unsigned,unsigned> RegMap;
+
+ const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
+ unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+ BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
+ .addReg(NewPredR)
+ .addMBB(&PB)
+ .addReg(G.Inp.Reg)
+ .addMBB(&LB);
+ RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
+
+ for (unsigned i = G.Ins.size(); i > 0; --i) {
+ const MachineInstr *SI = G.Ins[i-1];
+ unsigned DR = getDefReg(SI);
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ unsigned NewDR = MRI->createVirtualRegister(RC);
+ DebugLoc DL = SI->getDebugLoc();
+
+ auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
+ for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
+ const MachineOperand &Op = SI->getOperand(j);
+ if (!Op.isReg()) {
+ MIB.addOperand(Op);
+ continue;
+ }
+ if (!Op.isUse())
+ continue;
+ unsigned UseR = RegMap[Op.getReg()];
+ MIB.addReg(UseR, 0, Op.getSubReg());
+ }
+ RegMap.insert(std::make_pair(DR, NewDR));
+ }
+
+ HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
+}
+
+
+bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
+ DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
+ std::vector<PhiInfo> Phis;
+ for (auto &I : *C.LB) {
+ if (!I.isPHI())
+ break;
+ unsigned PR = getDefReg(&I);
+ if (isConst(PR))
+ continue;
+ bool BadUse = false, GoodUse = false;
+ for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() != C.LB) {
+ BadUse = true;
+ break;
+ }
+ if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
+ GoodUse = true;
+ }
+ if (BadUse || !GoodUse)
+ continue;
+
+ Phis.push_back(PhiInfo(I, *C.LB));
+ }
+
+ DEBUG({
+ dbgs() << "Phis: {";
+ for (auto &I : Phis) {
+ dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
+ << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
+ << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
+ << I.LB->getNumber() << ')';
+ }
+ dbgs() << " }\n";
+ });
+
+ if (Phis.empty())
+ return false;
+
+ bool Changed = false;
+ InstrList ShufIns;
+
+ // Go backwards in the block: for each bit shuffling instruction, check
+ // if that instruction could potentially be moved to the front of the loop:
+ // the output of the loop cannot be used in a non-shuffling instruction
+ // in this loop.
+ for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
+ if (I->isTerminator())
+ continue;
+ if (I->isPHI())
+ break;
+
+ RegisterSet Defs;
+ HBS::getInstrDefs(*I, Defs);
+ if (Defs.count() != 1)
+ continue;
+ unsigned DefR = Defs.find_first();
+ if (!TargetRegisterInfo::isVirtualRegister(DefR))
+ continue;
+ if (!isBitShuffle(&*I, DefR))
+ continue;
+
+ bool BadUse = false;
+ for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseI = UI->getParent();
+ if (UseI->getParent() == C.LB) {
+ if (UseI->isPHI()) {
+ // If the use is in a phi node in this loop, then it should be
+ // the value corresponding to the back edge.
+ unsigned Idx = UI.getOperandNo();
+ if (UseI->getOperand(Idx+1).getMBB() != C.LB)
+ BadUse = true;
+ } else {
+ auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+ if (F == ShufIns.end())
+ BadUse = true;
+ }
+ } else {
+ // There is a use outside of the loop, but there is no epilog block
+ // suitable for a copy-out.
+ if (C.EB == nullptr)
+ BadUse = true;
+ }
+ if (BadUse)
+ break;
+ }
+
+ if (BadUse)
+ continue;
+ ShufIns.push_back(&*I);
+ }
+
+ // Partition the list of shuffling instructions into instruction groups,
+ // where each group has to be moved as a whole (i.e. a group is a chain of
+ // dependent instructions). A group produces a single live output register,
+ // which is meant to be the input of the loop phi node (although this is
+ // not checked here yet). It also uses a single register as its input,
+ // which is some value produced in the loop body. After moving the group
+ // to the beginning of the loop, that input register would need to be
+ // the loop-carried register (through a phi node) instead of the (currently
+ // loop-carried) output register.
+ typedef std::vector<InstrGroup> InstrGroupList;
+ InstrGroupList Groups;
+
+ for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
+ MachineInstr *SI = ShufIns[i];
+ if (SI == nullptr)
+ continue;
+
+ InstrGroup G;
+ G.Ins.push_back(SI);
+ G.Out.Reg = getDefReg(SI);
+ RegisterSet Inputs;
+ HBS::getInstrUses(*SI, Inputs);
+
+ for (unsigned j = i+1; j < n; ++j) {
+ MachineInstr *MI = ShufIns[j];
+ if (MI == nullptr)
+ continue;
+ RegisterSet Defs;
+ HBS::getInstrDefs(*MI, Defs);
+ // If this instruction does not define any pending inputs, skip it.
+ if (!Defs.intersects(Inputs))
+ continue;
+ // Otherwise, add it to the current group and remove the inputs that
+ // are defined by MI.
+ G.Ins.push_back(MI);
+ Inputs.remove(Defs);
+ // Then add all registers used by MI.
+ HBS::getInstrUses(*MI, Inputs);
+ ShufIns[j] = nullptr;
+ }
+
+ // Only add a group if it requires at most one register.
+ if (Inputs.count() > 1)
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+ continue;
+
+ G.Inp.Reg = Inputs.find_first();
+ Groups.push_back(G);
+ }
+
+ DEBUG({
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ dbgs() << "Group[" << i << "] inp: "
+ << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
+ << " out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
+ for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
+ dbgs() << " " << *G.Ins[j];
+ }
+ });
+
+ for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+ InstrGroup &G = Groups[i];
+ if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
+ continue;
+ auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+ return G.Out.Reg == P.LR.Reg;
+ };
+ auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+ if (F == Phis.end())
+ continue;
+ unsigned PredR = 0;
+ if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
+ const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
+ unsigned Opc = DefPredR->getOpcode();
+ if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
+ continue;
+ if (!DefPredR->getOperand(1).isImm())
+ continue;
+ if (DefPredR->getOperand(1).getImm() != 0)
+ continue;
+ const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
+ if (RC != MRI->getRegClass(F->PR.Reg)) {
+ PredR = MRI->createVirtualRegister(RC);
+ unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
+ : Hexagon::A2_tfrpi;
+ auto T = C.PB->getFirstTerminator();
+ DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
+ BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+ .addImm(0);
+ } else {
+ PredR = F->PR.Reg;
+ }
+ }
+ assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
+ moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ HII = HST.getInstrInfo();
+ HRI = HST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+ BitTracker BT(HE, MF);
+ DEBUG(BT.trace(true));
+ BT.run();
+ BTP = &BT;
+
+ std::vector<LoopCand> Cand;
+
+ for (auto &B : MF) {
+ if (B.pred_size() != 2 || B.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PB = nullptr;
+ bool IsLoop = false;
+ for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
+ if (*PI != &B)
+ PB = *PI;
+ else
+ IsLoop = true;
+ }
+ if (!IsLoop)
+ continue;
+
+ MachineBasicBlock *EB = nullptr;
+ for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
+ if (*SI == &B)
+ continue;
+ // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
+ // edge from B to EP is non-critical.
+ if ((*SI)->pred_size() == 1)
+ EB = *SI;
+ break;
+ }
+
+ Cand.push_back(LoopCand(&B, PB, EB));
+ }
+
+ bool Changed = false;
+ for (auto &C : Cand)
+ Changed |= processLoop(C);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopRescheduling() {
+ return new HexagonLoopRescheduling();
+}
+
+FunctionPass *llvm::createHexagonBitSimplify() {
+ return new HexagonBitSimplify();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 021e58a..d5848dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -84,6 +84,8 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
switch (ID) {
case DoubleRegsRegClassID:
+ case VecDblRegsRegClassID:
+ case VecDblRegs128BRegClassID:
return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
: BT::BitMask(RW, 2*RW-1);
default:
@@ -95,30 +97,29 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
llvm_unreachable("Unexpected register/subregister");
}
-
namespace {
- struct RegisterRefs : public std::vector<BT::RegisterRef> {
- typedef std::vector<BT::RegisterRef> Base;
- RegisterRefs(const MachineInstr *MI);
- const BT::RegisterRef &operator[](unsigned n) const {
- // The main purpose of this operator is to assert with bad argument.
- assert(n < size());
- return Base::operator[](n);
- }
- };
+class RegisterRefs {
+ std::vector<BT::RegisterRef> Vector;
- RegisterRefs::RegisterRefs(const MachineInstr *MI)
- : Base(MI->getNumOperands()) {
- for (unsigned i = 0, n = size(); i < n; ++i) {
+public:
+ RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) {
+ for (unsigned i = 0, n = Vector.size(); i < n; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg())
- at(i) = BT::RegisterRef(MO);
+ Vector[i] = BT::RegisterRef(MO);
// For indices that don't correspond to registers, the entry will
// remain constructed via the default constructor.
}
}
-}
+ size_t size() const { return Vector.size(); }
+ const BT::RegisterRef &operator[](unsigned n) const {
+ // The main purpose of this operator is to assert with bad argument.
+ assert(n < Vector.size());
+ return Vector[n];
+ }
+};
+}
bool HexagonEvaluator::evaluate(const MachineInstr *MI,
const CellMapType &Inputs, CellMapType &Outputs) const {
@@ -189,7 +190,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI,
return true;
};
// Get the cell corresponding to the N-th operand.
- auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W)
+ auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W)
-> BT::RegisterCell {
const MachineOperand &Op = MI->getOperand(N);
if (Op.isImm())
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 3753b745..efafdd0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -102,7 +102,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
@@ -186,13 +186,11 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
if (case1 || case2) {
InvertAndChangeJumpTarget(MI, UncondTarget);
- MBB->removeSuccessor(JumpAroundTarget);
- MBB->addSuccessor(UncondTarget);
+ MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
// Remove the unconditional branch in LayoutSucc.
LayoutSucc->erase(LayoutSucc->begin());
- LayoutSucc->removeSuccessor(UncondTarget);
- LayoutSucc->addSuccessor(JumpAroundTarget);
+ LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget);
// This code performs the conversion for case 2, which moves
// the block to the fall-thru case (BB3 in the code above).
@@ -210,16 +208,15 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// The live-in to LayoutSucc is now all values live-in to
// JumpAroundTarget.
//
- std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
- LayoutSucc->livein_end());
- std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
- JumpAroundTarget->livein_end());
- for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
- LayoutSucc->removeLiveIn(OrigLiveIn[i]);
- }
- for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
- LayoutSucc->addLiveIn(NewLiveIn[i]);
- }
+ std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn(
+ LayoutSucc->livein_begin(), LayoutSucc->livein_end());
+ std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn(
+ JumpAroundTarget->livein_begin(),
+ JumpAroundTarget->livein_end());
+ for (const auto &OrigLI : OrigLiveIn)
+ LayoutSucc->removeLiveIn(OrigLI.PhysReg);
+ for (const auto &NewLI : NewLiveIn)
+ LayoutSucc->addLiveIn(NewLI);
}
}
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 9f5fac1..931db66 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -59,30 +59,23 @@ namespace {
// Numbering map for gep nodes. Used to keep track of ordering for
// gep nodes.
- struct NodeNumbering : public std::map<const GepNode*,unsigned> {
- };
-
- struct NodeOrdering : public NodeNumbering {
+ struct NodeOrdering {
NodeOrdering() : LastNum(0) {}
-#ifdef _MSC_VER
- void special_insert_for_special_msvc(const GepNode *N)
-#else
- using NodeNumbering::insert;
- void insert(const GepNode* N)
-#endif
- {
- insert(std::make_pair(N, ++LastNum));
- }
- bool operator() (const GepNode* N1, const GepNode *N2) const {
- const_iterator F1 = find(N1), F2 = find(N2);
- assert(F1 != end() && F2 != end());
+
+ void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
+ void clear() { Map.clear(); }
+
+ bool operator()(const GepNode *N1, const GepNode *N2) const {
+ auto F1 = Map.find(N1), F2 = Map.find(N2);
+ assert(F1 != Map.end() && F2 != Map.end());
return F1->second < F2->second;
}
+
private:
+ std::map<const GepNode *, unsigned> Map;
unsigned LastNum;
};
-
class HexagonCommonGEP : public FunctionPass {
public:
static char ID;
@@ -360,11 +353,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
Us.insert(&UI.getUse());
}
Nodes.push_back(N);
-#ifdef _MSC_VER
- NodeOrder.special_insert_for_special_msvc(N);
-#else
NodeOrder.insert(N);
-#endif
// Skip the first index operand, since we only handle 0. This dereferences
// the pointer operand.
@@ -379,11 +368,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
Nx->PTy = PtrTy;
Nx->Idx = Op;
Nodes.push_back(Nx);
-#ifdef _MSC_VER
- NodeOrder.special_insert_for_special_msvc(Nx);
-#else
NodeOrder.insert(Nx);
-#endif
PN = Nx;
PtrTy = next_type(PtrTy, Op);
@@ -404,7 +389,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
void HexagonCommonGEP::collect() {
// Establish depth-first traversal order of the dominator tree.
ValueVect BO;
- getBlockTraversalOrder(Fn->begin(), BO);
+ getBlockTraversalOrder(&Fn->front(), BO);
// The creation of gep nodes requires DT-traversal. When processing a GEP
// instruction that uses another GEP instruction as the base pointer, the
@@ -737,7 +722,7 @@ namespace {
Instruction *In = cast<Instruction>(V);
if (In->getParent() != B)
continue;
- BasicBlock::iterator It = In;
+ BasicBlock::iterator It = In->getIterator();
if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
FirstUse = It;
}
@@ -1135,7 +1120,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
ArrayRef<Value*> A(IdxList, IdxC);
Type *InpTy = Input->getType();
Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
- NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At);
+ NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
Input = NewInst;
} while (nax <= Num);
@@ -1213,7 +1198,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
Last = Child;
} while (true);
- BasicBlock::iterator InsertAt = LastB->getTerminator();
+ BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator();
if (LastUsed || LastCN > 0) {
ValueVect Urs;
getAllUsersForNode(Root, Urs, NCM);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
new file mode 100644
index 0000000..ee0c318
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -0,0 +1,1063 @@
+//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a Hexagon-specific if-conversion pass that runs on the
+// SSA form.
+// In SSA it is not straightforward to represent instructions that condi-
+// tionally define registers, since a conditionally-defined register may
+// only be used under the same condition on which the definition was based.
+// To avoid complications of this nature, this patch will only generate
+// predicated stores, and speculate other instructions from the "if-conver-
+// ted" block.
+// The code will recognize CFG patterns where a block with a conditional
+// branch "splits" into a "true block" and a "false block". Either of these
+// could be omitted (in case of a triangle, for example).
+// If after conversion of the side block(s) the CFG allows it, the resul-
+// ting blocks may be merged. If the "join" block contained PHI nodes, they
+// will be replaced with MUX (or MUX-like) instructions to maintain the
+// semantics of the PHI.
+//
+// Example:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
+// J2_jump <BB#4>, %PC<imp-def,dead>
+// Successors according to CFG: BB#4(62) BB#5(62)
+//
+// BB#4: derived from LLVM BB %if.then
+// Predecessors according to CFG: BB#3
+// %vreg11<def> = A2_addp %vreg6, %vreg10
+// S2_storerd_io %vreg32, 16, %vreg11
+// Successors according to CFG: BB#5
+//
+// BB#5: derived from LLVM BB %if.end
+// Predecessors according to CFG: BB#3 BB#4
+// %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
+// %vreg13<def> = A2_addp %vreg7, %vreg12
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6(4) BB#3(124)
+//
+// would become:
+//
+// %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+// %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// spec-> %vreg11<def> = A2_addp %vreg6, %vreg10
+// pred-> S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
+// %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+// %vreg13<def> = A2_addp %vreg7, %vreg46
+// %vreg42<def> = C2_cmpeqi %vreg9, 10
+// J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+// J2_jump <BB#6>, %PC<imp-def,dead>
+// Successors according to CFG: BB#6 BB#3
+
+#define DEBUG_TYPE "hexagon-eif"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonEarlyIfConversion();
+ void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
+}
+
+namespace {
+ cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
+ cl::init(false), cl::desc("Enable branch probability info"));
+ cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
+ cl::desc("Size limit in Hexagon early if-conversion"));
+
+ struct PrintMB {
+ PrintMB(const MachineBasicBlock *B) : MB(B) {}
+ const MachineBasicBlock *MB;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
+ if (!P.MB)
+ return OS << "<none>";
+ return OS << '#' << P.MB->getNumber();
+ }
+
+ struct FlowPattern {
+ FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+ FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
+ MachineBasicBlock *FB, MachineBasicBlock *JB)
+ : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
+
+ MachineBasicBlock *SplitB;
+ MachineBasicBlock *TrueB, *FalseB, *JoinB;
+ unsigned PredR;
+ };
+ struct PrintFP {
+ PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
+ : FP(P), TRI(T) {}
+ const FlowPattern &FP;
+ const TargetRegisterInfo &TRI;
+ friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
+ };
+ raw_ostream &operator<<(raw_ostream &OS,
+ const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+ raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
+ OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
+ << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
+ << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
+ << PrintMB(P.FP.FalseB)
+ << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
+ return OS;
+ }
+
+ class HexagonEarlyIfConversion : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonEarlyIfConversion() : MachineFunctionPass(ID),
+ TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+ initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon early if conversion";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ typedef DenseSet<MachineBasicBlock*> BlockSetType;
+
+ bool isPreheader(const MachineBasicBlock *B) const;
+ bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
+ FlowPattern &FP);
+ bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
+ bool visitLoop(MachineLoop *L);
+
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool isValidCandidate(const MachineBasicBlock *B) const;
+ bool usesUndefVReg(const MachineInstr *MI) const;
+ bool isValid(const FlowPattern &FP) const;
+ unsigned countPredicateDefs(const MachineBasicBlock *B) const;
+ unsigned computePhiCost(MachineBasicBlock *B) const;
+ bool isProfitable(const FlowPattern &FP) const;
+ bool isPredicableStore(const MachineInstr *MI) const;
+ bool isSafeToSpeculate(const MachineInstr *MI) const;
+
+ unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
+ void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
+ MachineInstr *MI, unsigned PredR, bool IfTrue);
+ void predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue);
+
+ void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
+ void convert(const FlowPattern &FP);
+
+ void removeBlock(MachineBasicBlock *B);
+ void eliminatePhis(MachineBasicBlock *B);
+ void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
+ void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
+ void simplifyFlowGraph(const FlowPattern &FP);
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineFunction *MFN;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *MDT;
+ MachineLoopInfo *MLI;
+ BlockSetType Deleted;
+ const MachineBranchProbabilityInfo *MBPI;
+ };
+
+ char HexagonEarlyIfConversion::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
+ "Hexagon early if conversion", false, false)
+
+bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
+ if (B->succ_size() != 1)
+ return false;
+ MachineBasicBlock *SB = *B->succ_begin();
+ MachineLoop *L = MLI->getLoopFor(SB);
+ return L && SB == L->getHeader();
+}
+
+
+bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
+ MachineLoop *L, FlowPattern &FP) {
+ DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
+
+ // Interested only in conditional branches, no .new, no new-value, etc.
+ // Check the terminators directly, it's easier than handling all responses
+ // from AnalyzeBranch.
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
+ if (T1I == B->end())
+ return false;
+ unsigned Opc = T1I->getOpcode();
+ if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
+ return false;
+ unsigned PredR = T1I->getOperand(0).getReg();
+
+ // Get the layout successor, or 0 if B does not have one.
+ MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
+ MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+
+ MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
+ MachineBasicBlock::const_iterator T2I = std::next(T1I);
+ // The second terminator should be an unconditional branch.
+ assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
+ MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
+ : T2I->getOperand(0).getMBB();
+ if (T1B == T2B) {
+ // XXX merge if T1B == NextB, or convert branch to unconditional.
+ // mark as diamond with both sides equal?
+ return false;
+ }
+ // Loop could be null for both.
+ if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
+ return false;
+
+ // Record the true/false blocks in such a way that "true" means "if (PredR)",
+ // and "false" means "if (!PredR)".
+ if (Opc == Hexagon::J2_jumpt)
+ TB = T1B, FB = T2B;
+ else
+ TB = T2B, FB = T1B;
+
+ if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
+ return false;
+
+ // Detect triangle first. In case of a triangle, one of the blocks TB/FB
+ // can fall through into the other, in other words, it will be executed
+ // in both cases. We only want to predicate the block that is executed
+ // conditionally.
+ unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
+ unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
+
+ // A block is predicable if it has one predecessor (it must be B), and
+ // it has a single successor. In fact, the block has to end either with
+ // an unconditional branch (which can be predicated), or with a fall-
+ // through.
+ bool TOk = (TNP == 1) && (TNS == 1);
+ bool FOk = (FNP == 1) && (FNS == 1);
+
+ // If neither is predicable, there is nothing interesting.
+ if (!TOk && !FOk)
+ return false;
+
+ MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
+ MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
+ MachineBasicBlock *JB = 0;
+
+ if (TOk) {
+ if (FOk) {
+ if (TSB == FSB)
+ JB = TSB;
+ // Diamond: "if (P) then TB; else FB;".
+ } else {
+ // TOk && !FOk
+ if (TSB == FB) {
+ JB = FB;
+ FB = 0;
+ }
+ }
+ } else {
+ // !TOk && FOk (at least one must be true by now).
+ if (FSB == TB) {
+ JB = TB;
+ TB = 0;
+ }
+ }
+ // Don't try to predicate loop preheaders.
+ if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
+ DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+ << " is a loop preheader. Skipping.\n");
+ return false;
+ }
+
+ FP = FlowPattern(B, PredR, TB, FB, JB);
+ DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+ return true;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// contains EH_LABEL.
+bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// that a block can never fall-through.
+bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
+ return true;
+ ++I;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
+ const {
+ if (!B)
+ return true;
+ if (B->isEHPad() || B->hasAddressTaken())
+ return false;
+ if (B->succ_size() == 0)
+ return false;
+
+ for (auto &MI : *B) {
+ if (MI.isDebugValue())
+ continue;
+ if (MI.isConditionalBranch())
+ return false;
+ unsigned Opc = MI.getOpcode();
+ bool IsJMP = (Opc == Hexagon::J2_jump);
+ if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
+ return false;
+ // Look for predicate registers defined by this instruction. It's ok
+ // to speculate such an instruction, but the predicate register cannot
+ // be used outside of this block (or else it won't be possible to
+ // update the use of it after predication). PHI uses will be updated
+ // to use a result of a MUX, and a MUX cannot be created for predicate
+ // registers.
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
+ continue;
+ for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
+ if (U->getParent()->isPHI())
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
+ for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isUse())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ const MachineInstr *DefI = MRI->getVRegDef(R);
+ // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
+ assert(DefI && "Expecting a reaching def in MRI");
+ if (DefI->isImplicitDef())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
+ if (hasEHLabel(FP.SplitB)) // KLUDGE: see function definition
+ return false;
+ if (FP.TrueB && !isValidCandidate(FP.TrueB))
+ return false;
+ if (FP.FalseB && !isValidCandidate(FP.FalseB))
+ return false;
+ // Check the PHIs in the join block. If any of them use a register
+ // that is defined as IMPLICIT_DEF, do not convert this. This can
+ // legitimately happen if one side of the split never executes, but
+ // the compiler is unable to prove it. That side may then seem to
+ // provide an "undef" value to the join block, however it will never
+ // execute at run-time. If we convert this case, the "undef" will
+ // be used in a MUX instruction, and that may seem like actually
+ // using an undefined value to other optimizations. This could lead
+ // to trouble further down the optimization stream, cause assertions
+ // to fail, etc.
+ if (FP.JoinB) {
+ const MachineBasicBlock &B = *FP.JoinB;
+ for (auto &MI : B) {
+ if (!MI.isPHI())
+ break;
+ if (usesUndefVReg(&MI))
+ return false;
+ unsigned DefR = MI.getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ if (RC == &Hexagon::PredRegsRegClass)
+ return false;
+ }
+ }
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
+ assert(B->pred_size() <= 2);
+ if (B->pred_size() < 2)
+ return 0;
+
+ unsigned Cost = 0;
+ MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
+ for (I = B->begin(); I != E; ++I) {
+ const MachineOperand &RO1 = I->getOperand(1);
+ const MachineOperand &RO3 = I->getOperand(3);
+ assert(RO1.isReg() && RO3.isReg());
+ // Must have a MUX if the phi uses a subregister.
+ if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+ Cost++;
+ continue;
+ }
+ MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
+ MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+ if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
+ Cost++;
+ }
+ return Cost;
+}
+
+
+unsigned HexagonEarlyIfConversion::countPredicateDefs(
+ const MachineBasicBlock *B) const {
+ unsigned PredDefs = 0;
+ for (auto &MI : *B) {
+ for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+ if (!MO->isReg() || !MO->isDef())
+ continue;
+ unsigned R = MO->getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+ PredDefs++;
+ }
+ }
+ return PredDefs;
+}
+
+
+bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+ if (FP.TrueB && FP.FalseB) {
+
+ // Do not IfCovert if the branch is one sided.
+ if (MBPI) {
+ BranchProbability Prob(9, 10);
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
+ return false;
+ if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
+ return false;
+ }
+
+ // If both sides are predicable, convert them if they join, and the
+ // join block has no other predecessors.
+ MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
+ MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
+ if (TSB != FSB)
+ return false;
+ if (TSB->pred_size() != 2)
+ return false;
+ }
+
+ // Calculate the total size of the predicated blocks.
+ // Assume instruction counts without branches to be the approximation of
+ // the code size. If the predicated blocks are smaller than a packet size,
+ // approximate the spare room in the packet that could be filled with the
+ // predicated/speculated instructions.
+ unsigned TS = 0, FS = 0, Spare = 0;
+ if (FP.TrueB) {
+ TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
+ if (TS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ if (FP.FalseB) {
+ FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
+ if (FS < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-TS;
+ }
+ unsigned TotalIn = TS+FS;
+ DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
+ << TotalIn << ", spare room: " << Spare << "\n");
+ if (TotalIn >= SizeLimit+Spare)
+ return false;
+
+ // Count the number of PHI nodes that will need to be updated (converted
+ // to MUX). Those can be later converted to predicated instructions, so
+ // they aren't always adding extra cost.
+ // KLUDGE: Also, count the number of predicate register definitions in
+ // each block. The scheduler may increase the pressure of these and cause
+ // expensive spills (e.g. bitmnp01).
+ unsigned TotalPh = 0;
+ unsigned PredDefs = countPredicateDefs(FP.SplitB);
+ if (FP.JoinB) {
+ TotalPh = computePhiCost(FP.JoinB);
+ PredDefs += countPredicateDefs(FP.JoinB);
+ } else {
+ if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.TrueB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+ MachineBasicBlock *SB = *FP.FalseB->succ_begin();
+ TotalPh += computePhiCost(SB);
+ PredDefs += countPredicateDefs(SB);
+ }
+ }
+ DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+ << TotalPh << "\n");
+ if (TotalIn+TotalPh >= SizeLimit+Spare)
+ return false;
+
+ DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+ if (PredDefs > 4)
+ return false;
+
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
+ MachineLoop *L) {
+ bool Changed = false;
+
+ // Visit all dominated blocks from the same loop first, then process B.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ // We will change CFG/DT during this traversal, so take precautions to
+ // avoid problems related to invalidated iterators. In fact, processing
+ // a child C of B cannot cause another child to be removed, but it can
+ // cause a new child to be added (which was a child of C before C itself
+ // was removed. This new child C, however, would have been processed
+ // prior to processing B, so there is no need to process it again.
+ // Simply keep a list of children of B, and traverse that list.
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ if (!Deleted.count(SB))
+ Changed |= visitBlock(SB, L);
+ }
+ // When walking down the dominator tree, we want to traverse through
+ // blocks from nested (other) loops, because they can dominate blocks
+ // that are in L. Skip the non-L blocks only after the tree traversal.
+ if (MLI->getLoopFor(B) != L)
+ return Changed;
+
+ FlowPattern FP;
+ if (!matchFlowPattern(B, L, FP))
+ return Changed;
+
+ if (!isValid(FP)) {
+ DEBUG(dbgs() << "Conversion is not valid\n");
+ return Changed;
+ }
+ if (!isProfitable(FP)) {
+ DEBUG(dbgs() << "Conversion is not profitable\n");
+ return Changed;
+ }
+
+ convert(FP);
+ simplifyFlowGraph(FP);
+ return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
+ MachineBasicBlock *HB = L ? L->getHeader() : 0;
+ DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+ : dbgs() << "Visiting function") << "\n");
+ bool Changed = false;
+ if (L) {
+ for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ }
+
+ MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
+ Changed |= visitBlock(L ? HB : EntryB, L);
+ return Changed;
+}
+
+
+bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
+ const {
+ // Exclude post-increment stores. Those return a value, so we cannot
+ // predicate them.
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+ switch (Opc) {
+ // Store byte:
+ case S2_storerb_io: case S4_storerb_rr:
+ case S2_storerbabs: case S4_storeirb_io: case S2_storerbgp:
+ // Store halfword:
+ case S2_storerh_io: case S4_storerh_rr:
+ case S2_storerhabs: case S4_storeirh_io: case S2_storerhgp:
+ // Store upper halfword:
+ case S2_storerf_io: case S4_storerf_rr:
+ case S2_storerfabs: case S2_storerfgp:
+ // Store word:
+ case S2_storeri_io: case S4_storeri_rr:
+ case S2_storeriabs: case S4_storeiri_io: case S2_storerigp:
+ // Store doubleword:
+ case S2_storerd_io: case S4_storerd_rr:
+ case S2_storerdabs: case S2_storerdgp:
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
+ const {
+ if (MI->mayLoad() || MI->mayStore())
+ return false;
+ if (MI->isCall() || MI->isBarrier() || MI->isBranch())
+ return false;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+
+ return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
+ bool IfTrue) const {
+ // Exclude post-increment stores.
+ using namespace Hexagon;
+ switch (Opc) {
+ case S2_storerb_io:
+ return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
+ case S4_storerb_rr:
+ return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
+ case S2_storerbabs:
+ case S2_storerbgp:
+ return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
+ case S4_storeirb_io:
+ return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
+ case S2_storerh_io:
+ return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
+ case S4_storerh_rr:
+ return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
+ case S2_storerhabs:
+ case S2_storerhgp:
+ return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
+ case S2_storerf_io:
+ return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
+ case S4_storerf_rr:
+ return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
+ case S2_storerfabs:
+ case S2_storerfgp:
+ return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
+ case S4_storeirh_io:
+ return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
+ case S2_storeri_io:
+ return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
+ case S4_storeri_rr:
+ return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
+ case S2_storeriabs:
+ case S2_storerigp:
+ return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
+ case S4_storeiri_io:
+ return IfTrue ? S4_storeirit_io : S4_storeirif_io;
+ case S2_storerd_io:
+ return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
+ case S4_storerd_rr:
+ return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
+ case S2_storerdabs:
+ case S2_storerdgp:
+ return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
+ }
+ llvm_unreachable("Unexpected opcode");
+ return 0;
+}
+
+
+void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineInstr *MI,
+ unsigned PredR, bool IfTrue) {
+ DebugLoc DL;
+ if (At != ToB->end())
+ DL = At->getDebugLoc();
+ else if (!ToB->empty())
+ DL = ToB->back().getDebugLoc();
+
+ unsigned Opc = MI->getOpcode();
+
+ if (isPredicableStore(MI)) {
+ unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
+ assert(COpc);
+ MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
+ .addReg(PredR);
+ for (MIOperands MO(MI); MO.isValid(); ++MO)
+ MIB.addOperand(*MO);
+
+ // Set memory references.
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ MI->eraseFromParent();
+ return;
+ }
+
+ if (Opc == Hexagon::J2_jump) {
+ MachineBasicBlock *TB = MI->getOperand(0).getMBB();
+ const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+ : Hexagon::J2_jumpf);
+ BuildMI(*ToB, At, DL, D)
+ .addReg(PredR)
+ .addMBB(TB);
+ MI->eraseFromParent();
+ return;
+ }
+
+ // Print the offending instruction unconditionally as we are about to
+ // abort.
+ dbgs() << *MI;
+ llvm_unreachable("Unexpected instruction");
+}
+
+
+// Predicate/speculate non-branch instructions from FromB into block ToB.
+// Leave the branches alone, they will be handled later. Btw, at this point
+// FromB should have at most one branch, and it should be unconditional.
+void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
+ MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+ unsigned PredR, bool IfTrue) {
+ DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+ MachineBasicBlock::iterator End = FromB->getFirstTerminator();
+ MachineBasicBlock::iterator I, NextI;
+
+ for (I = FromB->begin(); I != End; I = NextI) {
+ assert(!I->isPHI());
+ NextI = std::next(I);
+ if (isSafeToSpeculate(&*I))
+ ToB->splice(At, FromB, I);
+ else
+ predicateInstr(ToB, At, &*I, PredR, IfTrue);
+ }
+}
+
+
+void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
+ const FlowPattern &FP) {
+ // Visit all PHI nodes in the WhereB block and generate MUX instructions
+ // in the split block. Update the PHI nodes with the values of the MUX.
+ auto NonPHI = WhereB->getFirstNonPHI();
+ for (auto I = WhereB->begin(); I != NonPHI; ++I) {
+ MachineInstr *PN = &*I;
+ // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
+ unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
+ for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+ const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
+ if (BO.getMBB() == FP.SplitB)
+ SR = RO.getReg(), SSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.TrueB)
+ TR = RO.getReg(), TSR = RO.getSubReg();
+ else if (BO.getMBB() == FP.FalseB)
+ FR = RO.getReg(), FSR = RO.getSubReg();
+ else
+ continue;
+ PN->RemoveOperand(i+1);
+ PN->RemoveOperand(i);
+ }
+ if (TR == 0)
+ TR = SR, TSR = SSR;
+ else if (FR == 0)
+ FR = SR, FSR = SSR;
+ assert(TR && FR);
+
+ using namespace Hexagon;
+ unsigned DR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
+ : TII->get(MUX64_rr);
+
+ MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
+ DebugLoc DL;
+ if (MuxAt != FP.SplitB->end())
+ DL = MuxAt->getDebugLoc();
+ unsigned MuxR = MRI->createVirtualRegister(RC);
+ BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
+ .addReg(FP.PredR)
+ .addReg(TR, 0, TSR)
+ .addReg(FR, 0, FSR);
+
+ PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+ PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
+ }
+}
+
+
+void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
+ MachineBasicBlock *TSB = 0, *FSB = 0;
+ MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
+ assert(OldTI != FP.SplitB->end());
+ DebugLoc DL = OldTI->getDebugLoc();
+
+ if (FP.TrueB) {
+ TSB = *FP.TrueB->succ_begin();
+ predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
+ }
+ if (FP.FalseB) {
+ FSB = *FP.FalseB->succ_begin();
+ MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
+ predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
+ }
+
+ // Regenerate new terminators in the split block and update the successors.
+ // First, remember any information that may be needed later and remove the
+ // existing terminators/successors from the split block.
+ MachineBasicBlock *SSB = 0;
+ FP.SplitB->erase(OldTI, FP.SplitB->end());
+ while (FP.SplitB->succ_size() > 0) {
+ MachineBasicBlock *T = *FP.SplitB->succ_begin();
+ // It's possible that the split block had a successor that is not a pre-
+ // dicated block. This could only happen if there was only one block to
+ // be predicated. Example:
+ // split_b:
+ // if (p) jump true_b
+ // jump unrelated2_b
+ // unrelated1_b:
+ // ...
+ // unrelated2_b: ; can have other predecessors, so it's not "false_b"
+ // jump other_b
+ // true_b: ; only reachable from split_b, can be predicated
+ // ...
+ //
+ // Find this successor (SSB) if it exists.
+ if (T != FP.TrueB && T != FP.FalseB) {
+ assert(!SSB);
+ SSB = T;
+ }
+ FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
+ }
+
+ // Insert new branches and update the successors of the split block. This
+ // may create unconditional branches to the layout successor, etc., but
+ // that will be cleaned up later. For now, make sure that correct code is
+ // generated.
+ if (FP.JoinB) {
+ assert(!SSB || SSB == FP.JoinB);
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(FP.JoinB);
+ FP.SplitB->addSuccessor(FP.JoinB);
+ } else {
+ bool HasBranch = false;
+ if (TSB) {
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+ .addReg(FP.PredR)
+ .addMBB(TSB);
+ FP.SplitB->addSuccessor(TSB);
+ HasBranch = true;
+ }
+ if (FSB) {
+ const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
+ : TII->get(Hexagon::J2_jumpf);
+ MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
+ if (!HasBranch)
+ MIB.addReg(FP.PredR);
+ MIB.addMBB(FSB);
+ FP.SplitB->addSuccessor(FSB);
+ }
+ if (SSB) {
+ // This cannot happen if both TSB and FSB are set. [TF]SB are the
+ // successor blocks of the TrueB and FalseB (or null of the TrueB
+ // or FalseB block is null). SSB is the potential successor block
+ // of the SplitB that is neither TrueB nor FalseB.
+ BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+ .addMBB(SSB);
+ FP.SplitB->addSuccessor(SSB);
+ }
+ }
+
+ // What is left to do is to update the PHI nodes that could have entries
+ // referring to predicated blocks.
+ if (FP.JoinB) {
+ updatePhiNodes(FP.JoinB, FP);
+ } else {
+ if (TSB)
+ updatePhiNodes(TSB, FP);
+ if (FSB)
+ updatePhiNodes(FSB, FP);
+ // Nothing to update in SSB, since SSB's predecessors haven't changed.
+ }
+}
+
+
+void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+
+ // Transfer the immediate dominator information from B to its descendants.
+ MachineDomTreeNode *N = MDT->getNode(B);
+ MachineDomTreeNode *IDN = N->getIDom();
+ if (IDN) {
+ MachineBasicBlock *IDB = IDN->getBlock();
+ typedef GraphTraits<MachineDomTreeNode*> GTN;
+ typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+ DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+ for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+ MachineBasicBlock *SB = (*I)->getBlock();
+ MDT->changeImmediateDominator(SB, IDB);
+ }
+ }
+
+ while (B->succ_size() > 0)
+ B->removeSuccessor(B->succ_begin());
+
+ for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
+ (*I)->removeSuccessor(B, true);
+
+ Deleted.insert(B);
+ MDT->eraseNode(B);
+ MFN->erase(B->getIterator());
+}
+
+
+void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
+ DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+ MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
+ for (I = B->begin(); I != NonPHI; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *PN = &*I;
+ assert(PN->getNumOperands() == 3 && "Invalid phi node");
+ MachineOperand &UO = PN->getOperand(1);
+ unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
+ unsigned DefR = PN->getOperand(0).getReg();
+ unsigned NewR = UseR;
+ if (UseSR) {
+ // MRI.replaceVregUsesWith does not allow to update the subregister,
+ // so instead of doing the use-iteration here, create a copy into a
+ // "non-subregistered" register.
+ DebugLoc DL = PN->getDebugLoc();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+ NewR = MRI->createVirtualRegister(RC);
+ NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+ .addReg(UseR, 0, UseSR);
+ }
+ MRI->replaceRegWith(DefR, NewR);
+ B->erase(I);
+ }
+}
+
+
+void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
+ MachineBasicBlock *NewB) {
+ for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
+ MachineBasicBlock *SB = *I;
+ MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
+ for (P = SB->begin(); P != N; ++P) {
+ MachineInstr *PN = &*P;
+ for (MIOperands MO(PN); MO.isValid(); ++MO)
+ if (MO->isMBB() && MO->getMBB() == OldB)
+ MO->setMBB(NewB);
+ }
+ }
+}
+
+
+void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
+ MachineBasicBlock *SuccB) {
+ DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+ << PrintMB(SuccB) << "\n");
+ bool TermOk = hasUncondBranch(SuccB);
+ eliminatePhis(SuccB);
+ TII->RemoveBranch(*PredB);
+ PredB->removeSuccessor(SuccB);
+ PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
+ MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
+ for (I = SuccB->succ_begin(); I != E; ++I)
+ PredB->addSuccessor(*I);
+ PredB->normalizeSuccProbs();
+ replacePhiEdges(SuccB, PredB);
+ removeBlock(SuccB);
+ if (!TermOk)
+ PredB->updateTerminator();
+}
+
+
+void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+ if (FP.TrueB)
+ removeBlock(FP.TrueB);
+ if (FP.FalseB)
+ removeBlock(FP.FalseB);
+
+ FP.SplitB->updateTerminator();
+ if (FP.SplitB->succ_size() != 1)
+ return;
+
+ MachineBasicBlock *SB = *FP.SplitB->succ_begin();
+ if (SB->pred_size() != 1)
+ return;
+
+ // By now, the split block has only one successor (SB), and SB has only
+ // one predecessor. We can try to merge them. We will need to update ter-
+ // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+ // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
+ // with an unconditional branch, we won't need to touch the terminators.
+ if (!hasEHLabel(SB) || hasUncondBranch(SB))
+ mergeBlocks(FP.SplitB, SB);
+}
+
+
+bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
+ auto &ST = MF.getSubtarget();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MFN = &MF;
+ MRI = &MF.getRegInfo();
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+ nullptr;
+
+ Deleted.clear();
+ bool Changed = false;
+
+ for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+ Changed |= visitLoop(*I);
+ Changed |= visitLoop(0);
+
+ return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+FunctionPass *llvm::createHexagonEarlyIfConversion() {
+ return new HexagonEarlyIfConversion();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index e4c8d8f..6e2dbc0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -74,7 +74,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 21a8996..7a52a1c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -147,6 +147,48 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
"shrink-wraps"));
+static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
+ cl::Hidden, cl::desc("Use allocframe more conservatively"));
+
+
+namespace llvm {
+ void initializeHexagonCallFrameInformationPass(PassRegistry&);
+ FunctionPass *createHexagonCallFrameInformation();
+}
+
+namespace {
+ class HexagonCallFrameInformation : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonCallFrameInformation() : MachineFunctionPass(ID) {
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeHexagonCallFrameInformationPass(PR);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ };
+
+ char HexagonCallFrameInformation::ID = 0;
+}
+
+bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
+ auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+ bool NeedCFI = MF.getMMI().hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+
+ if (!NeedCFI)
+ return false;
+ HFI.insertCFIInstructions(MF);
+ return true;
+}
+
+INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
+ "Hexagon call frame information", false, false)
+
+FunctionPass *llvm::createHexagonCallFrameInformation() {
+ return new HexagonCallFrameInformation();
+}
+
+
namespace {
/// Map a register pair Reg to the subregister that has the greater "number",
/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
@@ -370,11 +412,11 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
insertEpilogueInBlock(*EpilogB);
} else {
for (auto &B : MF)
- if (!B.empty() && B.back().isReturn())
+ if (B.isReturnBlock())
insertCSRRestoresInBlock(B, CSI, HRI);
for (auto &B : MF)
- if (!B.empty() && B.back().isReturn())
+ if (B.isReturnBlock())
insertEpilogueInBlock(B);
}
}
@@ -383,10 +425,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
- auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
DebugLoc dl;
@@ -405,10 +444,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
bool AlignStack = (MaxAlign > getStackAlignment());
- // Check if frame moves are needed for EH.
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- MF.getFunction()->needsUnwindTableEntry();
-
// Get the number of bytes to allocate from the FrameInfo.
unsigned NumBytes = MFI->getStackSize();
unsigned SP = HRI.getStackRegister();
@@ -424,14 +459,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
MI->eraseFromParent();
}
- //
- // Only insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
- // that this shouldn't be required, but doing so now because gcc does and
- // gdb can't break at the start of the function without it. Will remove if
- // this turns out to be a gdb bug.
- //
- bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None);
- if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF))
+ if (!hasFP(MF))
return;
// Check for overflow.
@@ -469,92 +497,11 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
.addReg(SP)
.addImm(-int64_t(MaxAlign));
}
-
- if (needsFrameMoves) {
- std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
- MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
-
- // Advance CFA. DW_CFA_def_cfa
- unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
- unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
-
- // CFA = FP + 8
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
- FrameLabel, DwFPReg, -8));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // R31 (return addr) = CFA - #4
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- FrameLabel, DwRAReg, -4));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // R30 (frame ptr) = CFA - #8)
- CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- FrameLabel, DwFPReg, -8));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- unsigned int regsToMove[] = {
- Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
- Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
- Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
- Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
- Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9, Hexagon::D10,
- Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister
- };
-
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
- for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) {
- for (unsigned I = 0, E = CSI.size(); I < E; ++I) {
- if (CSI[I].getReg() == regsToMove[i]) {
- // Subtract 8 to make room for R30 and R31, which are added above.
- int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8;
-
- if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) {
- unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- DwarfReg, Offset));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else {
- // Split the double regs into subregs, and generate appropriate
- // cfi_offsets.
- // The only reason, we are split double regs is, llvm-mc does not
- // understand paired registers for cfi_offset.
- // Eg .cfi_offset r1:0, -64
- unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI);
- unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false);
- unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
- unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
- unsigned HiCFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- HiDwarfReg, Offset+4));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(HiCFIIndex);
- unsigned LoCFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(FrameLabel,
- LoDwarfReg, Offset));
- BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(LoCFIIndex);
- }
- break;
- }
- } // for CSI.size()
- } // for regsToMove
- } // needsFrameMoves
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
- //
- // Only insert deallocframe if we need to. Also at -O0. See comment
- // in insertPrologueInBlock above.
- //
- if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None)
+ if (!hasFP(MF))
return;
auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
@@ -630,12 +577,172 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
}
+namespace {
+ bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
+ if (!It->isBundle())
+ return It->getOpcode() == Hexagon::S2_allocframe;
+ auto End = It->getParent()->instr_end();
+ MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
+ while (++I != End && I->isBundled())
+ if (I->getOpcode() == Hexagon::S2_allocframe)
+ return true;
+ return false;
+ }
+
+ MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
+ for (auto &I : B)
+ if (IsAllocFrame(I))
+ return I;
+ return B.end();
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
+ for (auto &B : MF) {
+ auto AF = FindAllocFrame(B);
+ if (AF == B.end())
+ continue;
+ insertCFIInstructionsAt(B, ++AF);
+ }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ auto &HII = *HST.getInstrInfo();
+ auto &HRI = *HST.getRegisterInfo();
+
+ // If CFI instructions have debug information attached, something goes
+ // wrong with the final assembly generation: the prolog_end is placed
+ // in a wrong location.
+ DebugLoc DL;
+ const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
+
+ MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+ if (hasFP(MF)) {
+ unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+ unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+ // Define CFA via an offset from the value of FP.
+ //
+ // -8 -4 0 (SP)
+ // --+----+----+---------------------
+ // | FP | LR | increasing addresses -->
+ // --+----+----+---------------------
+ // | +-- Old SP (before allocframe)
+ // +-- New FP (after allocframe)
+ //
+ // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+ // MCCFIInstruction::createOffset takes the offset without sign change.
+ auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(DefCfa));
+ // R31 (return addr) = CFA - 4
+ auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR31));
+ // R30 (frame ptr) = CFA - 8
+ auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffR30));
+ }
+
+ static unsigned int RegsToMove[] = {
+ Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
+ Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+ Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+ Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+ Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9,
+ Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
+ Hexagon::NoRegister
+ };
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
+ unsigned Reg = RegsToMove[i];
+ auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
+ return C.getReg() == Reg;
+ };
+ auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+ if (F == CSI.end())
+ continue;
+
+ // Subtract 8 to make room for R30 and R31, which are added above.
+ unsigned FrameReg;
+ int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
+
+ if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
+ unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
+ auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffReg));
+ } else {
+ // Split the double regs into subregs, and generate appropriate
+ // cfi_offsets.
+ // The only reason, we are split double regs is, llvm-mc does not
+ // understand paired registers for cfi_offset.
+ // Eg .cfi_offset r1:0, -64
+
+ unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
+ unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+ unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+ unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+ auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
+ Offset+4);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffHi));
+ auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
+ Offset);
+ BuildMI(MBB, At, DL, CFID)
+ .addCFIIndex(MMI.addFrameInst(OffLo));
+ }
+ }
+}
+
+
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const HexagonMachineFunctionInfo *FuncInfo =
- MF.getInfo<HexagonMachineFunctionInfo>();
- return MFI->hasCalls() || MFI->getStackSize() > 0 ||
- FuncInfo->hasClobberLR();
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ bool HasFixed = MFI.getNumFixedObjects();
+ bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
+ .getLocalFrameObjectCount();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+
+ // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
+ // that this shouldn't be required, but doing so now because gcc does and
+ // gdb can't break at the start of the function without it. Will remove if
+ // this turns out to be a gdb bug.
+ //
+ if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ // By default we want to use SP (since it's always there). FP requires
+ // some setup (i.e. ALLOCFRAME).
+ // Fixed and preallocated objects need FP if the distance from them to
+ // the SP is unknown (as is with alloca or aligna).
+ if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+ return true;
+
+ if (MFI.getStackSize() > 0) {
+ if (UseAllocframe)
+ return true;
+ }
+
+ if (MFI.hasCalls() ||
+ MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+ return true;
+
+ return false;
}
@@ -718,9 +825,89 @@ static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
}
-int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- return MF.getFrameInfo()->getObjectOffset(FI);
+int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg) const {
+ auto &MFI = *MF.getFrameInfo();
+ auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+ // Large parts of this code are shared with HRI::eliminateFrameIndex.
+ int Offset = MFI.getObjectOffset(FI);
+ bool HasAlloca = MFI.hasVarSizedObjects();
+ bool HasExtraAlign = HRI.needsStackRealignment(MF);
+ bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
+ unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
+ unsigned AP = 0;
+ if (const MachineInstr *AI = getAlignaInstr(MF))
+ AP = AI->getOperand(0).getReg();
+ unsigned FrameSize = MFI.getStackSize();
+
+ bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
+ // Use FP at -O0, except when there are objects with extra alignment.
+ // That additional alignment requirement may cause a pad to be inserted,
+ // which will make it impossible to use FP to access objects located
+ // past the pad.
+ if (NoOpt && !HasExtraAlign)
+ UseFP = true;
+ if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+ // Fixed and preallocated objects will be located before any padding
+ // so FP must be used to access them.
+ UseFP |= (HasAlloca || HasExtraAlign);
+ } else {
+ if (HasAlloca) {
+ if (HasExtraAlign)
+ UseAP = true;
+ else
+ UseFP = true;
+ }
+ }
+
+ // If FP was picked, then there had better be FP.
+ bool HasFP = hasFP(MF);
+ assert((HasFP || !UseFP) && "This function must have frame pointer");
+
+ // Having FP implies allocframe. Allocframe will store extra 8 bytes:
+ // FP/LR. If the base register is used to access an object across these
+ // 8 bytes, then the offset will need to be adjusted by 8.
+ //
+ // After allocframe:
+ // HexagonISelLowering adds 8 to ---+
+ // the offsets of all stack-based |
+ // arguments (*) |
+ // |
+ // getObjectOffset < 0 0 8 getObjectOffset >= 8
+ // ------------------------+-----+------------------------> increasing
+ // <local objects> |FP/LR| <input arguments> addresses
+ // -----------------+------+-----+------------------------>
+ // | |
+ // SP/AP point --+ +-- FP points here (**)
+ // somewhere on
+ // this side of FP/LR
+ //
+ // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
+ // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
+
+ // The lowering assumes that FP/LR is present, and so the offsets of
+ // the formal arguments start at 8. If FP/LR is not there we need to
+ // reduce the offset by 8.
+ if (Offset > 0 && !HasFP)
+ Offset -= 8;
+
+ if (UseFP)
+ FrameReg = FP;
+ else if (UseAP)
+ FrameReg = AP;
+ else
+ FrameReg = SP;
+
+ // Calculate the actual offset in the instruction. If there is no FP
+ // (in other words, no allocframe), then SP will not be adjusted (i.e.
+ // there will be no SP -= FrameSize), so the frame size should not be
+ // added to the calculated offset.
+ int RealOffset = Offset;
+ if (!UseFP && !UseAP && HasFP)
+ RealOffset = FrameSize+Offset;
+ return RealOffset;
}
@@ -731,7 +918,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI = MBB.begin();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
if (useSpillFunction(MF, CSI)) {
unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
@@ -739,7 +926,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
// Call spill function.
DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
MachineInstr *SaveRegsCall =
- BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+ BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
.addExternalSymbol(SpillFun);
// Add callee-saved registers as use.
addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
@@ -757,7 +944,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
int FI = CSI[i].getFrameIdx();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+ HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
if (IsKill)
MBB.addLiveIn(Reg);
}
@@ -772,7 +959,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
if (useRestoreFunction(MF, CSI)) {
bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
@@ -787,14 +974,14 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
if (HasTC) {
unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
- DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc))
+ DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
.addExternalSymbol(RestoreFn);
} else {
// The block has a return.
MachineBasicBlock::iterator It = MBB.getFirstTerminator();
assert(It->isReturn() && std::next(It) == MBB.end());
unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
- DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc))
+ DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
.addExternalSymbol(RestoreFn);
// Transfer the function live-out registers.
DeallocCall->copyImplicitOps(MF, It);
@@ -807,7 +994,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
int FI = CSI[i].getFrameIdx();
- TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
+ HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
}
return true;
}
@@ -832,9 +1019,9 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
// via AP, which may not be available at the particular place in the program.
MachineFrameInfo *MFI = MF.getFrameInfo();
bool HasAlloca = MFI->hasVarSizedObjects();
- bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment());
+ bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
- if (!HasAlloca || !HasAligna)
+ if (!HasAlloca || !NeedsAlign)
return;
unsigned LFS = MFI->getLocalFrameSize();
@@ -864,13 +1051,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
- if (MRI.isPhysRegUsed(FreeReg))
+ if (!MRI.reg_nodbg_empty(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
- if (MRI.isPhysRegUsed(*AI)) {
+ if (!MRI.reg_nodbg_empty(*AI)) {
IsCurrentRegUsed = true;
break;
}
@@ -896,7 +1083,7 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block.
MachineBasicBlock::iterator NextII;
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
@@ -1210,7 +1397,8 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
}
-MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
+const MachineInstr *HexagonFrameLowering::getAlignaInstr(
+ const MachineFunction &MF) const {
for (auto &B : MF)
for (auto &I : B)
if (I.getOpcode() == Hexagon::ALIGNA)
@@ -1219,6 +1407,7 @@ MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
}
+// FIXME: Use Function::optForSize().
inline static bool isOptSize(const MachineFunction &MF) {
AttributeSet AF = MF.getFunction()->getAttributes();
return AF.hasAttribute(AttributeSet::FunctionIndex,
@@ -1226,8 +1415,7 @@ inline static bool isOptSize(const MachineFunction &MF) {
}
inline static bool isMinSize(const MachineFunction &MF) {
- AttributeSet AF = MF.getFunction()->getAttributes();
- return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ return MF.getFunction()->optForMinSize();
}
@@ -1289,4 +1477,3 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
: SpillFuncThreshold;
return Threshold < NumCSI;
}
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index d39ee2c..683b303 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -51,7 +51,8 @@ public:
bool targetHandlesStackFrameRounding() const override {
return true;
}
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
bool hasFP(const MachineFunction &MF) const override;
const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
@@ -73,7 +74,9 @@ public:
const override;
bool needsAligna(const MachineFunction &MF) const;
- MachineInstr *getAlignaInstr(MachineFunction &MF) const;
+ const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
+
+ void insertCFIInstructions(MachineFunction &MF) const;
private:
typedef std::vector<CalleeSavedInfo> CSIVect;
@@ -86,6 +89,8 @@ private:
const HexagonRegisterInfo &HRI) const;
bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
const HexagonRegisterInfo &HRI) const;
+ void insertCFIInstructionsAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator At) const;
void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
@@ -94,7 +99,7 @@ private:
void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
MachineBasicBlock *&EpilogB) const;
- bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const;
+ bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index 4d32208..f26e2ff 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -195,7 +195,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
return false;
}
- IRBuilder<> IRB(BB, In);
+ IRBuilder<> IRB(In);
Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
: Intrinsic::hexagon_S2_extractup;
Module *Mod = BB->getParent()->getParent();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 096da94..64a2b6c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -77,9 +77,8 @@ namespace {
namespace {
// Set of virtual registers, based on BitVector.
struct RegisterSet : private BitVector {
- RegisterSet() : BitVector() {}
+ RegisterSet() = default;
explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
- RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
using BitVector::clear;
@@ -1496,7 +1495,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
// version of DCE that preserves lifetime markers. Without it, merging
// of stack objects can fail to recognize and merge disjoint objects
// leading to unnecessary stack growth.
- Changed |= removeDeadCode(MDT->getRootNode());
+ Changed = removeDeadCode(MDT->getRootNode());
const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
BitTracker BTLoc(HE, MF);
@@ -1534,7 +1533,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
}
if (IFMap.empty())
- return false;
+ return Changed;
{
NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
@@ -1547,7 +1546,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
}
if (IFMap.empty())
- return false;
+ return Changed;
{
NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
@@ -1572,13 +1571,15 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, n = Out.size(); i < n; ++i)
IFMap.erase(Out[i]);
}
+ if (IFMap.empty())
+ return Changed;
{
NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
- Changed = generateInserts();
+ generateInserts();
}
- return Changed;
+ return true;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
new file mode 100644
index 0000000..c059d56
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -0,0 +1,319 @@
+//===--- HexagonGenMux.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// During instruction selection, MUX instructions are generated for
+// conditional assignments. Since such assignments often present an
+// opportunity to predicate instructions, HexagonExpandCondsets
+// expands MUXes into pairs of conditional transfers, and then proceeds
+// with predication of the producers/consumers of the registers involved.
+// This happens after exiting from the SSA form, but before the machine
+// instruction scheduler. After the scheduler and after the register
+// allocation there can be cases of pairs of conditional transfers
+// resulting from a MUX where neither of them was further predicated. If
+// these transfers are now placed far enough from the instruction defining
+// the predicate register, they cannot use the .new form. In such cases it
+// is better to collapse them back to a single MUX instruction.
+
+#define DEBUG_TYPE "hexmux"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonGenMux();
+ void initializeHexagonGenMuxPass(PassRegistry& Registry);
+}
+
+namespace {
+ class HexagonGenMux : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+ initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon generate mux instructions";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+ struct CondsetInfo {
+ unsigned PredR;
+ unsigned TrueX, FalseX;
+ CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+ };
+ struct DefUseInfo {
+ BitVector Defs, Uses;
+ DefUseInfo() : Defs(), Uses() {}
+ DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
+ };
+ struct MuxInfo {
+ MachineBasicBlock::iterator At;
+ unsigned DefR, PredR;
+ MachineOperand *SrcT, *SrcF;
+ MachineInstr *Def1, *Def2;
+ MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
+ MachineOperand *TOp, MachineOperand *FOp,
+ MachineInstr *D1, MachineInstr *D2)
+ : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1),
+ Def2(D2) {}
+ };
+ typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
+ typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
+ typedef SmallVector<MuxInfo,4> MuxInfoList;
+
+ bool isRegPair(unsigned Reg) const {
+ return Hexagon::DoubleRegsRegClass.contains(Reg);
+ }
+ void getSubRegs(unsigned Reg, BitVector &SRs) const;
+ void expandReg(unsigned Reg, BitVector &Set) const;
+ void getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const;
+ void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM);
+ bool isCondTransfer(unsigned Opc) const;
+ unsigned getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const;
+ bool genMuxInBlock(MachineBasicBlock &B);
+ };
+
+ char HexagonGenMux::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+ "Hexagon generate mux instructions", false, false)
+
+
+void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
+ for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
+ SRs[*I] = true;
+}
+
+
+void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
+ if (isRegPair(Reg))
+ getSubRegs(Reg, Set);
+ else
+ Set[Reg] = true;
+}
+
+
+void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
+ BitVector &Uses) const {
+ // First, get the implicit defs and uses for this instruction.
+ unsigned Opc = MI->getOpcode();
+ const MCInstrDesc &D = HII->get(Opc);
+ if (const MCPhysReg *R = D.ImplicitDefs)
+ while (*R)
+ expandReg(*R++, Defs);
+ if (const MCPhysReg *R = D.ImplicitUses)
+ while (*R)
+ expandReg(*R++, Uses);
+
+ // Look over all operands, and collect explicit defs and uses.
+ for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+ if (!Mo->isReg() || Mo->isImplicit())
+ continue;
+ unsigned R = Mo->getReg();
+ BitVector &Set = Mo->isDef() ? Defs : Uses;
+ expandReg(R, Set);
+ }
+}
+
+
+void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+ DefUseInfoMap &DUM) {
+ unsigned Index = 0;
+ unsigned NR = HRI->getNumRegs();
+ BitVector Defs(NR), Uses(NR);
+
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ I2X.insert(std::make_pair(MI, Index));
+ Defs.reset();
+ Uses.reset();
+ getDefsUses(MI, Defs, Uses);
+ DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
+ Index++;
+ }
+}
+
+
+bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
+ switch (Opc) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ return true;
+ }
+ return false;
+}
+
+
+unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
+ const MachineOperand &Src2) const {
+ bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
+ if (IsReg1)
+ return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
+ if (IsReg2)
+ return Hexagon::C2_muxri;
+
+ // Neither is a register. The first source is extendable, but the second
+ // is not (s8).
+ if (Src2.isImm() && isInt<8>(Src2.getImm()))
+ return Hexagon::C2_muxii;
+
+ return 0;
+}
+
+
+bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ InstrIndexMap I2X;
+ DefUseInfoMap DUM;
+ buildMaps(B, I2X, DUM);
+
+ typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
+ CondsetMap CM;
+ MuxInfoList ML;
+
+ MachineBasicBlock::iterator NextI, End = B.end();
+ for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
+ MachineInstr *MI = &*I;
+ NextI = std::next(I);
+ unsigned Opc = MI->getOpcode();
+ if (!isCondTransfer(Opc))
+ continue;
+ unsigned DR = MI->getOperand(0).getReg();
+ if (isRegPair(DR))
+ continue;
+
+ unsigned PR = MI->getOperand(1).getReg();
+ unsigned Idx = I2X.lookup(MI);
+ CondsetMap::iterator F = CM.find(DR);
+ bool IfTrue = HII->isPredicatedTrue(Opc);
+
+ // If there is no record of a conditional transfer for this register,
+ // or the predicate register differs, create a new record for it.
+ if (F != CM.end() && F->second.PredR != PR) {
+ CM.erase(F);
+ F = CM.end();
+ }
+ if (F == CM.end()) {
+ auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
+ F = It.first;
+ F->second.PredR = PR;
+ }
+ CondsetInfo &CI = F->second;
+ if (IfTrue)
+ CI.TrueX = Idx;
+ else
+ CI.FalseX = Idx;
+ if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+ continue;
+
+ // There is now a complete definition of DR, i.e. we have the predicate
+ // register, the definition if-true, and definition if-false.
+
+ // First, check if both definitions are far enough from the definition
+ // of the predicate register.
+ unsigned MinX = std::min(CI.TrueX, CI.FalseX);
+ unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
+ unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+ bool NearDef = false;
+ for (unsigned X = SearchX; X < MaxX; ++X) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (!DU.Defs[PR])
+ continue;
+ NearDef = true;
+ break;
+ }
+ if (NearDef)
+ continue;
+
+ // The predicate register is not defined in the last few instructions.
+ // Check if the conversion to MUX is possible (either "up", i.e. at the
+ // place of the earlier partial definition, or "down", where the later
+ // definition is located). Examine all defs and uses between these two
+ // definitions.
+ // SR1, SR2 - source registers from the first and the second definition.
+ MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
+ std::advance(It1, MinX);
+ std::advance(It2, MaxX);
+ MachineInstr *Def1 = It1, *Def2 = It2;
+ MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
+ unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
+ unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+ bool Failure = false, CanUp = true, CanDown = true;
+ for (unsigned X = MinX+1; X < MaxX; X++) {
+ const DefUseInfo &DU = DUM.lookup(X);
+ if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
+ Failure = true;
+ break;
+ }
+ if (CanDown && DU.Defs[SR1])
+ CanDown = false;
+ if (CanUp && DU.Defs[SR2])
+ CanUp = false;
+ }
+ if (Failure || (!CanUp && !CanDown))
+ continue;
+
+ MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
+ MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
+ // Prefer "down", since this will move the MUX farther away from the
+ // predicate definition.
+ MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+ ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
+ }
+
+ for (unsigned I = 0, N = ML.size(); I < N; ++I) {
+ MuxInfo &MX = ML[I];
+ MachineBasicBlock &B = *MX.At->getParent();
+ DebugLoc DL = MX.At->getDebugLoc();
+ unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
+ if (!MxOpc)
+ continue;
+ BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+ .addReg(MX.PredR)
+ .addOperand(*MX.SrcT)
+ .addOperand(*MX.SrcF);
+ B.erase(MX.Def1);
+ B.erase(MX.Def2);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ bool Changed = false;
+ for (auto &I : MF)
+ Changed |= genMuxInBlock(I);
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonGenMux() {
+ return new HexagonGenMux();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 6905c4f..d9675b5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -250,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
unsigned NewPR = MRI->createVirtualRegister(PredRC);
// For convertible instructions, do not modify them, so that they can
- // be coverted later. Generate a copy from Reg to NewPR.
+ // be converted later. Generate a copy from Reg to NewPR.
if (isConvertibleToPredForm(DefI)) {
MachineBasicBlock::iterator DefIt = DefI;
BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 53b6bf6..d20a809 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -727,9 +727,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// Phis that may feed into the loop.
LoopFeederMap LoopFeederPhi;
- // Check if the inital value may be zero and can be decremented in the first
+ // Check if the initial value may be zero and can be decremented in the first
// iteration. If the value is zero, the endloop instruction will not decrement
- // the loop counter, so we shoudn't generate a hardware loop in this case.
+ // the loop counter, so we shouldn't generate a hardware loop in this case.
if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
LoopFeederPhi))
return nullptr;
@@ -1288,14 +1288,14 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
typedef MachineBasicBlock::instr_iterator instr_iterator;
// Check if things are in order to begin with.
- for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+ for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
if (&*I == CmpI)
return true;
// Out of order.
unsigned PredR = CmpI->getOperand(0).getReg();
bool FoundBump = false;
- instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt);
+ instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
MachineInstr *In = &*I;
for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
@@ -1307,9 +1307,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
}
if (In == BumpI) {
- instr_iterator After = BumpI;
- instr_iterator From = CmpI;
- BB->splice(std::next(After), BB, From);
+ BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
FoundBump = true;
break;
}
@@ -1440,7 +1438,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
if (Comparison::isSigned(Cmp))
return false;
- // Check if there is a comparison of the inital value. If the initial value
+ // Check if there is a comparison of the initial value. If the initial value
// is greater than or not equal to another value, then assume this is a
// range check.
if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
@@ -1850,7 +1848,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
}
MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
- MF->insert(Header, NewPH);
+ MF->insert(Header->getIterator(), NewPH);
if (Header->pred_size() > 2) {
// Ensure that the header has only two predecessors: the preheader and
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9123057..a0da945 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -50,16 +50,21 @@ namespace {
class HexagonDAGToDAGISel : public SelectionDAGISel {
const HexagonTargetMachine& HTM;
const HexagonSubtarget *HST;
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
public:
explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), HTM(tm) {
+ : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+ HRI(nullptr) {
initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ HRI = HST->getRegisterInfo();
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -104,7 +109,6 @@ public:
SDNode *SelectConstantFP(SDNode *N);
SDNode *SelectAdd(SDNode *N);
SDNode *SelectBitOp(SDNode *N);
- bool isConstExtProfitable(SDNode *N) const;
// XformMskToBitPosU5Imm - Returns the bit position which
// the single bit 32 bit mask represents.
@@ -139,8 +143,8 @@ public:
// type i32 where the negative literal is transformed into a positive literal
// for use in -= memops.
inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
- assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
- return CurDAG->getTargetConstant( - Imm, DL, MVT::i32);
+ assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
+ return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
}
// XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
@@ -203,11 +207,10 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
// Intrinsics that return a a predicate.
-static unsigned doesIntrinsicReturnPredicate(unsigned ID)
-{
+static bool doesIntrinsicReturnPredicate(unsigned ID) {
switch (ID) {
default:
- return 0;
+ return false;
case Intrinsic::hexagon_C2_cmpeq:
case Intrinsic::hexagon_C2_cmpgt:
case Intrinsic::hexagon_C2_cmpgtu:
@@ -244,7 +247,7 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
case Intrinsic::hexagon_C2_tfrrp:
case Intrinsic::hexagon_S2_tstbit_i:
case Intrinsic::hexagon_S2_tstbit_r:
- return 1;
+ return true;
}
}
@@ -258,8 +261,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
MVT::Other, Base, TargetConst,
@@ -312,8 +314,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
SDNode *OffsetNode = Offset.getNode();
int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
@@ -378,29 +379,46 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
// loads.
ISD::LoadExtType ExtType = LD->getExtensionType();
bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
+ bool HasVecOffset = false;
// Figure out the opcode.
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
if (LoadedVT == MVT::i64) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadrd_pi;
else
Opcode = Hexagon::L2_loadrd_io;
} else if (LoadedVT == MVT::i32) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = Hexagon::L2_loadri_pi;
else
Opcode = Hexagon::L2_loadri_io;
} else if (LoadedVT == MVT::i16) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
else
Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
} else if (LoadedVT == MVT::i8) {
- if (TII.isValidAutoIncImm(LoadedVT, Val))
+ if (HII->isValidAutoIncImm(LoadedVT, Val))
Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
else
Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+ } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 ||
+ LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai;
+ // 128B
+ } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 ||
+ LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) {
+ HasVecOffset = true;
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+ Opcode = Hexagon::V6_vL32b_pi_128B;
+ }
+ else
+ Opcode = Hexagon::V6_vL32b_ai_128B;
} else
llvm_unreachable("unknown memory type");
@@ -411,7 +429,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
- if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+ if (HII->isValidAutoIncImm(LoadedVT, Val)) {
SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
LD->getValueType(0),
@@ -420,15 +438,25 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = LD->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- const SDValue Froms[] = { SDValue(LD, 0),
- SDValue(LD, 1),
- SDValue(LD, 2)
- };
- const SDValue Tos[] = { SDValue(Result, 0),
- SDValue(Result, 1),
- SDValue(Result, 2)
- };
- ReplaceUses(Froms, Tos, 3);
+ if (HasVecOffset) {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 2);
+ } else {
+ const SDValue Froms[] = { SDValue(LD, 0),
+ SDValue(LD, 1),
+ SDValue(LD, 2)
+ };
+ const SDValue Tos[] = { SDValue(Result, 0),
+ SDValue(Result, 1),
+ SDValue(Result, 2)
+ };
+ ReplaceUses(Froms, Tos, 3);
+ }
return Result;
} else {
SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
@@ -487,8 +515,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
// Offset value must be within representable range
// and must have correct alignment properties.
- const HexagonInstrInfo &TII = *HST->getInstrInfo();
- if (TII.isValidAutoIncImm(StoredVT, Val)) {
+ if (HII->isValidAutoIncImm(StoredVT, Val)) {
unsigned Opcode = 0;
// Figure out the post inc version of opcode.
@@ -496,7 +523,15 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
- else llvm_unreachable("unknown memory type");
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) {
+ Opcode = Hexagon::V6_vS32b_pi;
+ }
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) {
+ Opcode = Hexagon::V6_vS32b_pi_128B;
+ } else llvm_unreachable("unknown memory type");
if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
@@ -530,6 +565,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
+ else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+ StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8)
+ Opcode = Hexagon::V6_vS32b_ai;
+ // 128B
+ else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+ StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8)
+ Opcode = Hexagon::V6_vS32b_ai_128B;
else llvm_unreachable("unknown memory type");
// Build regular store.
@@ -1113,14 +1155,12 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
}
if (Opc == ISD::AND) {
- if (((ValueVT == MVT::i32) &&
- (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
- ((ValueVT == MVT::i64) &&
- (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
- // If it's simple AND, do the normal op.
- return SelectCode(N);
- else
+ // Check if this is a bit-clearing AND, if not select code the usual way.
+ if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
+ (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
Val = ~Val;
+ else
+ return SelectCode(N);
}
// If OR or AND is being fed by shl, srl and, sra don't do this change,
@@ -1128,7 +1168,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
// Traverse the DAG to see if there is shl, srl and sra.
if (Opc == ISD::OR || Opc == ISD::AND) {
switch (N->getOperand(0)->getOpcode()) {
- default: break;
+ default:
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -1137,23 +1178,24 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
}
// Make sure it's power of 2.
- unsigned bitpos = 0;
+ unsigned BitPos = 0;
if (Opc != ISD::FABS && Opc != ISD::FNEG) {
- if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
- ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+ if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
+ (ValueVT == MVT::i64 && !isPowerOf2_64(Val)))
return SelectCode(N);
// Get the bit position.
- bitpos = countTrailingZeros(uint64_t(Val));
+ BitPos = countTrailingZeros(uint64_t(Val));
} else {
// For fabs and fneg, it's always the 31st bit.
- bitpos = 31;
+ BitPos = 31;
}
unsigned BitOpc = 0;
// Set the right opcode for bitwise operations.
- switch(Opc) {
- default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+ switch (Opc) {
+ default:
+ llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
case ISD::AND:
case ISD::FABS:
BitOpc = Hexagon::S2_clrbit_i;
@@ -1169,7 +1211,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
SDNode *Result;
// Get the right SDVal for the opcode.
- SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32);
+ SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
@@ -1198,7 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
MVT::i32, SDValue(Reg, 0));
// Clear/set/toggle hi or lo registers depending on the bit position.
- if (SubValueVT != MVT::f32 && bitpos < 32) {
+ if (SubValueVT != MVT::f32 && BitPos < 32) {
SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
SubregLO, SDVal);
const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
@@ -1207,7 +1249,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
dl, ValueVT, Ops);
} else {
if (Opc != ISD::FABS && Opc != ISD::FNEG)
- SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32);
+ SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
SubregHI, SDVal);
const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
@@ -1328,25 +1370,12 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return false;
}
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
- unsigned UseCount = 0;
- unsigned CallCount = 0;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- // Ignore call instructions.
- if (I->getOpcode() == ISD::CopyToReg)
- ++CallCount;
- UseCount++;
- }
-
- return (UseCount <= 1) || (CallCount > 1);
-
-}
void HexagonDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG &DAG = *CurDAG;
std::vector<SDNode*> Nodes;
- for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
- Nodes.push_back(I);
+ for (SDNode &Node : DAG.allnodes())
+ Nodes.push_back(&Node);
// Simplify: (or (select c x 0) z) -> (select c (or x z) z)
// (or (select c 0 y) z) -> (select c z (or y z))
@@ -1397,11 +1426,10 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
return;
MachineFrameInfo *MFI = MF->getFrameInfo();
- MachineBasicBlock *EntryBB = MF->begin();
+ MachineBasicBlock *EntryBB = &MF->front();
unsigned AR = FuncInfo->CreateReg(MVT::i32);
unsigned MaxA = MFI->getMaxAlignment();
- auto &HII = *HST.getInstrInfo();
- BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR)
+ BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
.addImm(MaxA);
MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index c739afb..0167090 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
#define DEBUG_TYPE "hexagon-lowering"
-static cl::opt<bool>
-EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
+ cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
@@ -98,6 +98,9 @@ public:
}
// Implement calling convention for Hexagon.
+
+static bool IsHvxVectorType(MVT ty);
+
static bool
CC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -114,6 +117,11 @@ CC_Hexagon64(unsigned ValNo, MVT ValVT,
ISD::ArgFlagsTy ArgFlags, CCState &State);
static bool
+CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
RetCC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
@@ -129,6 +137,11 @@ RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
ISD::ArgFlagsTy ArgFlags, CCState &State);
static bool
+RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -169,15 +182,43 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
+ if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
+ LocVT == MVT::v16i8) {
+ ofst = State.AllocateStack(16, 16);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
+ LocVT == MVT::v32i8) {
+ ofst = State.AllocateStack(32, 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
+ ofst = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
+ ofst = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+ if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8) {
+ ofst = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+ return false;
+ }
+
llvm_unreachable(nullptr);
}
-static bool
-CC_Hexagon (unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
if (ArgFlags.isByVal()) {
// Passed on stack.
unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
@@ -213,6 +254,17 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
return false;
}
+ if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
+ unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+
+ if (IsHvxVectorType(LocVT)) {
+ if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
+
return true; // CC didn't match.
}
@@ -260,10 +312,82 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
+static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+ static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
+ Hexagon::V2, Hexagon::V3,
+ Hexagon::V4, Hexagon::V5,
+ Hexagon::V6, Hexagon::V7,
+ Hexagon::V8, Hexagon::V9,
+ Hexagon::V10, Hexagon::V11,
+ Hexagon::V12, Hexagon::V13,
+ Hexagon::V14, Hexagon::V15};
+ static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
+ Hexagon::W2, Hexagon::W3,
+ Hexagon::W4, Hexagon::W5,
+ Hexagon::W6, Hexagon::W7};
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(64, 64);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && !UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ // 128B Mode
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v256i8)) {
+ if (unsigned Reg = State.AllocateReg(VecLstD)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(256, 256);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ if ((UseHVX && UseHVXDbl) &&
+ (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
+ if (unsigned Reg = State.AllocateReg(VecLstS)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ unsigned Offset = State.AllocateStack(128, 128);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+ }
+ return true;
+}
+
static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
if (LocVT == MVT::i1 ||
LocVT == MVT::i8 ||
@@ -282,8 +406,24 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
} else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
LocVT = MVT::i64;
LocInfo = CCValAssign::BCvt;
+ } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
+ LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
+ LocVT == MVT::v512i1) {
+ LocVT = MVT::v16i32;
+ ValVT = MVT::v16i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
+ LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
+ (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
+ LocVT = MVT::v32i32;
+ ValVT = MVT::v32i32;
+ LocInfo = CCValAssign::Full;
+ } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
+ LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
+ LocVT = MVT::v64i32;
+ ValVT = MVT::v64i32;
+ LocInfo = CCValAssign::Full;
}
-
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
@@ -293,7 +433,10 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
return false;
}
-
+ if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
+ if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+ return false;
+ }
return true; // CC didn't match.
}
@@ -328,6 +471,52 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
+static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ auto &MF = State.getMachineFunction();
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ unsigned OffSiz = 64;
+ if (LocVT == MVT::v16i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ } else if (LocVT == MVT::v32i32) {
+ unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
+ if (unsigned Reg = State.AllocateReg(Req)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 128;
+ } else if (LocVT == MVT::v64i32) {
+ if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ OffSiz = 256;
+ }
+
+ unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return false;
+}
+
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+ if (VT != PromotedLdStVT) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+ PromotedLdStVT.getSimpleVT());
+ }
+}
+
SDValue
HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
const {
@@ -351,6 +540,15 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
MachinePointerInfo(), MachinePointerInfo());
}
+static bool IsHvxVectorType(MVT ty) {
+ return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
+ ty == MVT::v64i8 ||
+ ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
+ ty == MVT::v128i8 ||
+ ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
+ ty == MVT::v256i8 ||
+ ty == MVT::v512i1 || ty == MVT::v1024i1);
+}
// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
// passed by value, the function prototype is modified to return void and
@@ -463,19 +661,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Check for varargs.
int NumNamedVarArgParams = -1;
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
- {
- const Function* CalleeFn = nullptr;
- Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
- if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
- {
+ if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = GAN->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+ if (const Function* F = dyn_cast<Function>(GV)) {
// If a function has zero args and is a vararg function, that's
// disallowed so it must be an undeclared function. Do not assume
// varargs if the callee is undefined.
- if (CalleeFn->isVarArg() &&
- CalleeFn->getFunctionType()->getNumParams() != 0) {
- NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
- }
+ if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
+ NumNamedVarArgParams = F->getFunctionType()->getNumParams();
}
}
@@ -519,11 +713,16 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
+ bool NeedsArgAlign = false;
+ unsigned LargestAlignSeen = 0;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Record if we need > 8 byte alignment on an argument.
+ bool ArgAlign = IsHvxVectorType(VA.getValVT());
+ NeedsArgAlign |= ArgAlign;
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -549,13 +748,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
StackPtr.getValueType());
MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
+ if (ArgAlign)
+ LargestAlignSeen = std::max(LargestAlignSeen,
+ VA.getLocVT().getStoreSizeInBits() >> 3);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
// is is pointer.
MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
Flags, DAG, dl));
} else {
- MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset);
+ MachinePointerInfo LocPI = MachinePointerInfo::getStack(
+ DAG.getMachineFunction(), LocMemOffset);
SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
false, 0);
MemOpChains.push_back(S);
@@ -569,6 +772,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
}
+ if (NeedsArgAlign && Subtarget.hasV60TOps()) {
+ DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+ MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+ // V6 vectors passed by value have 64 or 128 byte alignment depending
+ // on whether we are 64 byte vector mode or 128 byte.
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
+ assert(Subtarget.useHVXOps());
+ const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
+ LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+ MFI->ensureMaxAlignment(LargestAlignSeen);
+ }
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
@@ -613,12 +827,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- if (flag_aligned_memcpy) {
- const char *MemcpyName =
- "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
- Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT);
- flag_aligned_memcpy = false;
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
} else if (ExternalSymbolSDNode *S =
dyn_cast<ExternalSymbolSDNode>(Callee)) {
@@ -668,7 +877,19 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
if (Ptr->getOpcode() != ISD::ADD)
return false;
- if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+ auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
+ bool UseHVX = HST.useHVXOps();
+ bool UseHVXDbl = HST.useHVXDblOps();
+
+ bool ValidHVXDblType =
+ (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8);
+ bool ValidHVXType =
+ UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8);
+
+ if (ValidHVXDblType || ValidHVXType ||
+ VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
isInc = (Ptr->getOpcode() == ISD::ADD);
Base = Ptr->getOperand(0);
Offset = Ptr->getOperand(1);
@@ -679,23 +900,6 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
return false;
}
-// TODO: Put this function along with the other isS* functions in
-// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonOperands.td.
-static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
- ConstantSDNode *N = cast<ConstantSDNode>(S);
-
- // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
- // field.
- int64_t v = (int64_t)N->getSExtValue();
- int64_t m = 0;
- if (ShiftAmount > 0) {
- m = v % ShiftAmount;
- v = v >> ShiftAmount;
- }
- return (v <= 7) && (v >= -8) && (m == 0);
-}
-
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
@@ -724,18 +928,20 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isInc = false;
bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
- // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
- int ShiftAmount = VT.getSizeInBits() / 16;
- if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
- AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
- return true;
+ if (isLegal) {
+ auto &HII = *Subtarget.getInstrInfo();
+ int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ if (HII.isValidAutoIncImm(VT, OffsetVal)) {
+ AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+ }
}
return false;
}
-SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue
+HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
@@ -784,47 +990,6 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
return Op;
}
-
-//
-// Taken from the XCore backend.
-//
-SDValue HexagonTargetLowering::
-LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Table = Op.getOperand(1);
- SDValue Index = Op.getOperand(2);
- SDLoc dl(Op);
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
- unsigned JTI = JT->getIndex();
- MachineFunction &MF = DAG.getMachineFunction();
- const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
- SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
-
- // Mark all jump table targets as address taken.
- const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
- const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
- for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
- MachineBasicBlock *MBB = JTBBs[i];
- MBB->setHasAddressTaken();
- // This line is needed to set the hasAddressTaken flag on the BasicBlock
- // object.
- BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
- }
-
- SDValue JumpTableBase = DAG.getNode(
- HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT);
- SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
- DAG.getConstant(2, dl, MVT::i32));
- SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
- ShiftIndex);
- SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
- MachinePointerInfo(), false, false, false,
- 0);
- return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
-}
-
-
SDValue
HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
@@ -850,7 +1015,10 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue AC = DAG.getConstant(A, dl, MVT::i32);
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
- return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+ SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+ if (Op.getNode()->getHasDebugValue())
+ DAG.TransferDbgValues(Op, AA);
+ return AA;
}
SDValue
@@ -882,7 +1050,8 @@ const {
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
- SmallVector<SDValue, 4> MemOps;
+ SmallVector<SDValue, 8> MemOps;
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -908,6 +1077,42 @@ const {
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Single Vector
+ } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
+ RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+ // Double Vector
+ } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+ RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (UseHVX && UseHVXDbl &&
+ ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
+ RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
+ assert(0 && "need to support VecPred regs");
+ unsigned VReg =
+ RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
} else {
assert (0);
}
@@ -1056,8 +1261,8 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
- const {
+SDValue
+HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue PredOp = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
EVT OpVT = Op1.getValueType();
@@ -1163,16 +1368,33 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
- SDLoc dl(Op);
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- SDValue Res;
- if (CP->isMachineConstantPoolEntry())
- Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy,
- CP->getAlignment());
+ ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
+ unsigned Align = CPN->getAlignment();
+ Reloc::Model RM = HTM.getRelocationModel();
+ unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0;
+
+ SDValue T;
+ if (CPN->isMachineConstantPoolEntry())
+ T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
else
- Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
- CP->getAlignment());
- return DAG.getNode(HexagonISD::CP, dl, ValTy, Res);
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+ if (RM == Reloc::PIC_)
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
+ return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int Idx = cast<JumpTableSDNode>(Op)->getIndex();
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::PIC_) {
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
+ }
+
+ SDValue T = DAG.getTargetJumpTable(Idx, VT);
+ return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
}
SDValue
@@ -1219,52 +1441,70 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
- SelectionDAG& DAG) const {
+SDValue
+HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
SDLoc dl(Op);
return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
}
-SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue Result;
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+SDValue
+HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
+ auto *GAN = cast<GlobalAddressSDNode>(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+ auto *GV = GAN->getGlobal();
+ int64_t Offset = GAN->getOffset();
+
+ auto &HLOF = *HTM.getObjFileLowering();
+ Reloc::Model RM = HTM.getRelocationModel();
- const HexagonTargetObjectFile *TLOF =
- static_cast<const HexagonTargetObjectFile *>(
- getTargetMachine().getObjFileLowering());
- if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
- return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result);
+ if (RM == Reloc::Static) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+ if (HLOF.IsGlobalInSmallSection(GV, HTM))
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
+ return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
}
- return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result);
+ bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() ||
+ (GV->hasLocalLinkage() && !isa<Function>(GV));
+ if (UsePCRel) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
+ }
+
+ // Use GOT index.
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
+ SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
+ return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
}
// Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
-void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
- if (VT != PromotedLdStVT) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+SDValue
+HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ SDLoc dl(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
- PromotedLdStVT.getSimpleVT());
+ Reloc::Model RM = HTM.getRelocationModel();
+ if (RM == Reloc::Static) {
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
+ return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
}
+
+ SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
}
SDValue
-HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32);
- SDLoc dl(Op);
- return DAG.getNode(HexagonISD::CONST32_GP, dl,
- getPointerTy(DAG.getDataLayout()), BA_SD);
+HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
+ const {
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
+ HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
}
//===----------------------------------------------------------------------===//
@@ -1272,18 +1512,19 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
//===----------------------------------------------------------------------===//
HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
- const HexagonSubtarget &STI)
+ const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
- Subtarget(STI) {
+ Subtarget(ST) {
bool IsV4 = !Subtarget.hasV5TOps();
auto &HRI = *Subtarget.getRegisterInfo();
+ bool UseHVX = Subtarget.useHVXOps();
+ bool UseHVXSgl = Subtarget.useHVXSglOps();
+ bool UseHVXDbl = Subtarget.useHVXDblOps();
setPrefLoopAlignment(4);
setPrefFunctionAlignment(4);
setMinFunctionAlignment(2);
setInsertFencesForAtomic(false);
- setExceptionPointerRegister(Hexagon::R0);
- setExceptionSelectorRegister(Hexagon::R1);
setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
if (EnableHexSDNodeSched)
@@ -1320,6 +1561,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
+ if (Subtarget.hasV60TOps()) {
+ if (Subtarget.useHVXSglOps()) {
+ addRegisterClass(MVT::v64i8, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v8i64, &Hexagon::VectorRegsRegClass);
+ addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
+ addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
+ } else if (Subtarget.useHVXDblOps()) {
+ addRegisterClass(MVT::v128i8, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v16i64, &Hexagon::VectorRegs128BRegClass);
+ addRegisterClass(MVT::v256i8, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v64i32, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v32i64, &Hexagon::VecDblRegs128BRegClass);
+ addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
+ }
+
+ }
+
//
// Handling of scalar operations.
//
@@ -1336,10 +1602,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+ setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom legalize GlobalAddress nodes into CONST32.
@@ -1361,11 +1629,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
if (EmitJumpTables)
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ setMinimumJumpTableEntries(2);
else
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // Increase jump tables cutover to 5, was 4.
- setMinimumJumpTableEntries(MinimumJumpTables);
+ setMinimumJumpTableEntries(MinimumJumpTables);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
// Hexagon has instructions for add/sub with carry. The problem with
// modeling these instructions is that they produce 2 results: Rdd and Px.
@@ -1420,9 +1687,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::i64, Expand);
for (unsigned IntExpOp :
- {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM,
- ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS,
- ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+ { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
+ ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
setOperationAction(IntExpOp, MVT::i32, Expand);
setOperationAction(IntExpOp, MVT::i64, Expand);
}
@@ -1475,7 +1743,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Set the action for vector operations to "expand", then override it with
// either "custom" or "legal" for specific cases.
- static unsigned VectExpOps[] = {
+ static const unsigned VectExpOps[] = {
// Integer arithmetic:
ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
@@ -1539,7 +1807,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
-
+ if (UseHVX) {
+ if (UseHVXSgl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom);
+ } else if (UseHVXDbl) {
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom);
+ } else {
+ llvm_unreachable("Unrecognized HVX mode");
+ }
+ }
// Subtarget-specific operation actions.
//
if (Subtarget.hasV5TOps()) {
@@ -1586,7 +1868,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
for (ISD::CondCode FPExpCCV4 :
{ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
- ISD::SETUO, ISD::SETO}) {
+ ISD::SETUO, ISD::SETO}) {
setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
}
@@ -1599,6 +1881,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
}
+ if (UseHVXDbl) {
+ for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+ setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+ setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+ }
+ }
+
computeRegisterProperties(&HRI);
//
@@ -1720,7 +2009,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
- case HexagonISD::BR_JT: return "HexagonISD::BR_JT";
case HexagonISD::CALLR: return "HexagonISD::CALLR";
case HexagonISD::CALLv3nr: return "HexagonISD::CALLv3nr";
case HexagonISD::CALLv3: return "HexagonISD::CALLv3";
@@ -1737,7 +2025,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
- case HexagonISD::PIC_ADD: return "HexagonISD::PIC_ADD";
case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
@@ -1754,6 +2041,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ";
case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT";
case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU";
+ case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
case HexagonISD::VSHLH: return "HexagonISD::VSHLH";
case HexagonISD::VSHLW: return "HexagonISD::VSHLW";
case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB";
@@ -1923,8 +2211,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned Size = VT.getSizeInBits();
- // A vector larger than 64 bits cannot be represented in Hexagon.
- // Expand will split the vector.
+ // Only handle vectors of 64 bits or shorter.
if (Size > 64)
return SDValue();
@@ -2058,58 +2345,61 @@ SDValue
HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
+ bool UseHVX = Subtarget.useHVXOps();
EVT VT = Op.getValueType();
unsigned NElts = Op.getNumOperands();
- SDValue Vec = Op.getOperand(0);
- EVT VecVT = Vec.getValueType();
- SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64);
- SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
- DAG.getConstant(32, dl, MVT::i64));
- SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64);
-
- ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
- ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
-
- if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
- if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
- // We are trying to concat two v2i16 to a single v4i16.
- SDValue Vec0 = Op.getOperand(1);
- SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
- return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+ SDValue Vec0 = Op.getOperand(0);
+ EVT VecVT = Vec0.getValueType();
+ unsigned Width = VecVT.getSizeInBits();
+
+ if (NElts == 2) {
+ MVT ST = VecVT.getSimpleVT();
+ // We are trying to concat two v2i16 to a single v4i16, or two v4i8
+ // into a single v8i8.
+ if (ST == MVT::v2i16 || ST == MVT::v4i8)
+ return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
+
+ if (UseHVX) {
+ assert((Width == 64*8 && Subtarget.useHVXSglOps()) ||
+ (Width == 128*8 && Subtarget.useHVXDblOps()));
+ SDValue Vec1 = Op.getOperand(1);
+ MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
+ MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
+ SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
+ SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
+ SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
+ return DAG.getNode(ISD::BITCAST, dl, VT, VC);
}
}
- if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
- if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
- // We are trying to concat two v4i8 to a single v8i8.
- SDValue Vec0 = Op.getOperand(1);
- SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
- return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
- }
- }
+ if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
+ return SDValue();
+
+ SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
+ SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
+ SDValue W = DAG.getConstant(Width, dl, MVT::i64);
+ // Create the "width" part of the argument to insert_rp/insertp_rp.
+ SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
+ SDValue V = C0;
for (unsigned i = 0, e = NElts; i != e; ++i) {
- unsigned OpIdx = NElts - i - 1;
- SDValue Operand = Op.getOperand(OpIdx);
+ unsigned N = NElts-i-1;
+ SDValue OpN = Op.getOperand(N);
- if (VT.getSizeInBits() == 64 &&
- Operand.getValueType().getSizeInBits() == 32) {
+ if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
SDValue C = DAG.getConstant(0, dl, MVT::i32);
- Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+ OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
}
-
- SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
- SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
- SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
- const SDValue Ops[] = {ConstVal, Operand, Combined};
-
+ SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
+ SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
if (VT.getSizeInBits() == 32)
- ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
else
- ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+ V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
}
- return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+ return DAG.getNode(ISD::BITCAST, dl, VT, V);
}
SDValue
@@ -2301,6 +2591,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SHL:
case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
// Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
@@ -2308,8 +2599,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::BR_JT: return LowerBR_JT(Op, DAG);
// Custom lower some vector loads.
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
@@ -2321,6 +2612,16 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
}
+/// Returns relocation base for the given PIC jumptable.
+SDValue
+HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+ SelectionDAG &DAG) const {
+ int Idx = cast<JumpTableSDNode>(Table)->getIndex();
+ EVT VT = Table.getValueType();
+ SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+ return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
+}
+
MachineBasicBlock *
HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB)
@@ -2343,6 +2644,8 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
std::pair<unsigned, const TargetRegisterClass *>
HexagonTargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r': // R0-R31
@@ -2358,6 +2661,42 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
case MVT::f64:
return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
}
+ case 'q': // q0-q3
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v1024i1:
+ case MVT::v512i1:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+ }
+ case 'v': // V0-V31
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8:
+ case MVT::v8i64:
+ return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+ case MVT::v32i32:
+ case MVT::v64i16:
+ case MVT::v16i64:
+ case MVT::v128i8:
+ if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+ return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+ else
+ return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+ }
+
default:
llvm_unreachable("Unknown asm register class");
}
@@ -2397,6 +2736,14 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
}
+/// Return true if folding a constant offset with the given GlobalAddress is
+/// legal. It is frequently not legal in PIC relocation models.
+bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
+ const {
+ return HTM.getRelocationModel() == Reloc::Static;
+}
+
+
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can compare
/// a register against the immediate without having to materialize the
@@ -2428,8 +2775,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
// ***************************************************************************
// If this is a tail call via a function pointer, then don't do it!
- if (!(dyn_cast<GlobalAddressSDNode>(Callee))
- && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+ if (!(isa<GlobalAddressSDNode>(Callee)) &&
+ !(isa<ExternalSymbolSDNode>(Callee))) {
return false;
}
@@ -2467,6 +2814,41 @@ bool llvm::isPositiveHalfWord(SDNode *N) {
}
}
+std::pair<const TargetRegisterClass*, uint8_t>
+HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+ MVT VT) const {
+ const TargetRegisterClass *RRC = nullptr;
+
+ uint8_t Cost = 1;
+ switch (VT.SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(TRI, VT);
+ case MVT::v64i8:
+ case MVT::v32i16:
+ case MVT::v16i32:
+ case MVT::v8i64:
+ RRC = &Hexagon::VectorRegsRegClass;
+ break;
+ case MVT::v128i8:
+ case MVT::v64i16:
+ case MVT::v32i32:
+ case MVT::v16i64:
+ if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
+ Subtarget.useHVXDblOps())
+ RRC = &Hexagon::VectorRegs128BRegClass;
+ else
+ RRC = &Hexagon::VecDblRegsRegClass;
+ break;
+ case MVT::v256i8:
+ case MVT::v128i16:
+ case MVT::v64i32:
+ case MVT::v32i64:
+ RRC = &Hexagon::VecDblRegs128BRegClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
BasicBlock *BB = Builder.GetInsertBlock();
@@ -2498,13 +2880,15 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
return Ext;
}
-bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// Do not expand loads and stores that don't exceed 64 bits.
- return LI->getType()->getPrimitiveSizeInBits() > 64;
+ return LI->getType()->getPrimitiveSizeInBits() > 64
+ ? AtomicExpansionKind::LLOnly
+ : AtomicExpansionKind::None;
}
bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Do not expand loads and stores that don't exceed 64 bits.
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
}
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 2642abf..bf378b9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -35,16 +35,14 @@ bool isPositiveHalfWord(SDNode *N);
ALLOCA,
ARGEXTEND,
- PIC_ADD,
- AT_GOT,
- AT_PCREL,
+ AT_GOT, // Index in GOT.
+ AT_PCREL, // Offset relative to PC.
CALLv3, // A V3+ call instruction.
CALLv3nr, // A V3+ call instruction that doesn't return.
CALLR,
RET_FLAG, // Return with a flag operand.
- BR_JT, // Branch through jump table.
BARRIER, // Memory barrier.
JT, // Jump table.
CP, // Constant pool.
@@ -80,6 +78,7 @@ bool isPositiveHalfWord(SDNode *N);
INSERTRP,
EXTRACTU,
EXTRACTURP,
+ VCOMBINE,
TC_RETURN,
EH_RETURN,
DCFETCH,
@@ -127,7 +126,6 @@ bool isPositiveHalfWord(SDNode *N);
SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
@@ -137,6 +135,7 @@ bool isPositiveHalfWord(SDNode *N);
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
@@ -163,8 +162,23 @@ bool isPositiveHalfWord(SDNode *N);
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return Hexagon::R1;
+ }
+
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
EVT VT) const override {
if (!VT.isVector())
@@ -200,6 +214,10 @@ bool isPositiveHalfWord(SDNode *N);
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
+ /// Return true if folding a constant offset with the given GlobalAddress
+ /// is legal. It is frequently not legal in PIC relocation models.
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -208,20 +226,26 @@ bool isPositiveHalfWord(SDNode *N);
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
+ /// Returns relocation base for the given PIC jumptable.
+ SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+ const override;
+
// Handling of atomic RMW instructions.
- bool hasLoadLinkedStoreConditional() const override {
- return true;
- }
Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const override;
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Value *Addr, AtomicOrdering Ord) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+ AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI)
- const override {
- return AtomicRMWExpansionKind::LLSC;
+ AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+ return AtomicExpansionKind::LLSC;
}
+
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+ const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
new file mode 100644
index 0000000..5a1a69b
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -0,0 +1,462 @@
+//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Mappings
+//===----------------------------------------------------------------------===//
+
+
+def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
+ (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
+ (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
+ (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memb({GP}+#$addr) = $Nt",
+ (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt",
+ (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
+ (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt",
+ (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memd({GP}+#$addr) = $Nt",
+ (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
+
+def : InstAlias<"$Nt = memb({GP}+#$addr)",
+ (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memub({GP}+#$addr)",
+ (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memh({GP}+#$addr)",
+ (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memuh({GP}+#$addr)",
+ (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memw({GP}+#$addr)",
+ (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
+def : InstAlias<"$Nt = memd({GP}+#$addr)",
+ (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
+
+// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
+def : InstAlias<"memb($Rs) = $Rt",
+ (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt",
+ (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.h",
+ (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt",
+ (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = $Rt.new",
+ (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.new",
+ (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt.new",
+ (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = #$S8",
+ (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memh($Rs) = #$S8",
+ (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memw($Rs) = #$S8",
+ (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memd($Rs) = $Rtt",
+ (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"memb($Rs) = setbit(#$U5)",
+ (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = setbit(#$U5)",
+ (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = setbit(#$U5)",
+ (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memb($Rs) = clrbit(#$U5)",
+ (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = clrbit(#$U5)",
+ (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = clrbit(#$U5)",
+ (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
+def : InstAlias<"$Rd = memb($Rs)",
+ (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memub($Rs)",
+ (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memh($Rs)",
+ (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memuh($Rs)",
+ (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memw($Rs)",
+ (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memd($Rs)",
+ (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memubh($Rs)",
+ (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memubh($Rs)",
+ (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = membh($Rs)",
+ (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = membh($Rs)",
+ (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memb_fifo($Rs)",
+ (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memh_fifo($Rs)",
+ (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if ($Pt) $Rd = memXX($Rs)
+def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
+ (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
+ (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
+ (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
+ (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
+ (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if ($Pt) memXX($Rs) = $Rt
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
+ (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
+ (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
+ (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+
+// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
+// to: if (!$Pt) $Rd = memXX($Rs)
+def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
+ (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
+ (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
+ (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
+ (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
+ (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
+ (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
+// to: if (!$Pt) memXX($Rs) = $Rt
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
+ (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
+ (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
+ (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
+ (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
+ (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
+ (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
+ (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
+ (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
+ (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
+ (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
+ (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
+ (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
+ (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
+ (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
+ (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
+ (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
+ (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
+ (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
+ (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
+ (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
+ (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
+// to: memXX($Rs) |= $Rt
+def : InstAlias<"memb($Rs) &= $Rt",
+ (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) |= $Rt",
+ (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += $Rt",
+ (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= $Rt",
+ (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += #$U5",
+ (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= #$U5",
+ (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) &= $Rt",
+ (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) |= $Rt",
+ (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += $Rt",
+ (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= $Rt",
+ (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += #$U5",
+ (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= #$U5",
+ (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) &= $Rt",
+ (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) |= $Rt",
+ (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += $Rt",
+ (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= $Rt",
+ (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += #$U5",
+ (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= #$U5",
+ (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+ Requires<[UseMEMOP]>;
+
+//
+// Alias of: if ($Pv.new) memX($Rs) = $Rt
+// to: if (p3.new) memX(r17 + #0) = $Rt
+def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
+ (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdtnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
+ (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
+ (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
+ (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
+ (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
+ (S4_pstorerdfnew_io
+ PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+//
+// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
+// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
+def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
+ (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
+ (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
+ (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
+ (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
+ (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
+ (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
+ (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"dcfetch($Rs)",
+ (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
+
+// Alias of some insn mappings, others must be handled by the parser
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
+// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
+def : InstAlias<"$Rd = neg($Rs)",
+ (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
+
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+def : InstAlias<"$Pd = $Ps",
+ (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
+
+def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
+ (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
+
+def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
+ (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
+ (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
+
+// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+ (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+ (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
new file mode 100644
index 0000000..280832f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrEnc.td
@@ -0,0 +1,1019 @@
+class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src2};
+ let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
+}
+
+class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
+class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
+class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
+class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
+class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
+class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
+class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
+class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
+class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
+class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
+class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
+class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
+class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
+class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
+class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
+class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
+class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
+class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
+class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
+class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
+class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
+class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
+class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
+class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
+class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
+class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
+class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
+class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
+class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
+class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
+class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
+class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
+class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
+class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
+class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
+class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
+class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
+class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
+class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
+class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
+class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
+class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
+class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
+class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
+class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
+class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
+class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
+class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
+class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
+class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
+class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
+class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
+class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
+class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
+class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
+class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
+class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
+class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
+class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
+class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
+class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
+class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
+class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
+class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
+class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
+class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
+class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
+class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
+class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
+class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
+class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
+class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
+class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
+class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
+class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
+class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
+class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
+class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
+class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
+class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
+class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
+class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
+class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
+class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
+class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
+class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
+class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
+class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
+class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
+class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
+class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
+class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
+class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
+class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
+class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
+class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
+class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
+class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
+class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
+class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
+class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
+class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
+class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
+class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
+class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
+class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
+class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
+class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
+class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
+class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
+class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
+class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
+class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
+class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
+class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
+class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
+class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
+class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
+class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
+class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
+class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
+class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
+class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
+class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
+class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
+class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
+class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
+class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
+class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
+class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
+class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
+class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
+class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
+class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
+class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
+class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
+class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
+class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
+class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
+class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
+class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
+class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
+class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
+class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
+class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
+class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
+class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
+class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
+class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
+class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
+class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
+class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
+class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
+class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
+class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
+class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
+class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
+class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
+class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
+class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
+class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
+class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
+class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
+class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
+class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
+class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
+class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
+class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
+class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
+class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
+class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
+class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
+class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
+class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
+class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
+class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
+class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
+class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
+class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
+class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
+class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
+class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
+class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
+class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
+class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
+
+class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
+ let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
+}
+
+class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
+class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
+class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
+class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
+class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
+class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
+class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
+class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
+class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
+class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
+class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
+class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
+class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
+class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
+class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
+class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
+class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
+class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
+class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
+class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
+class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
+class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
+class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
+class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
+class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
+class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
+class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
+class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
+class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
+class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
+class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
+class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
+class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
+class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
+class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
+class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
+class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
+class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
+
+class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
+ let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
+class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
+class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
+class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
+class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
+class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
+class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
+class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
+class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
+class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
+class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
+class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
+
+class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b00011110000000, opc{5-4} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
+class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
+class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
+class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
+class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
+class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
+class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
+class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
+class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
+class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
+class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
+class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
+class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
+class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
+class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
+class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
+class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
+class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
+class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
+class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
+class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
+class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
+class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
+class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
+class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
+
+class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
+class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
+class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
+class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
+class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
+class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
+class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
+class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
+class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
+class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
+class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
+class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
+class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
+
+class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
+class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
+class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
+
+class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
+class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
+class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-6};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
+class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<11> src2;
+ bits<4> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{10-7};
+ let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
+class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
+class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
+class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
+class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
+class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
+class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
+class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
+class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
+class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
+class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
+
+class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
+class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
+class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
+class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
+class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
+class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
+class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
+class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
+class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
+class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-6};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
+class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
+class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
+class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<11> src3;
+ bits<4> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{10-7};
+ let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
+class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
+class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
+class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
+
+// TODO: Change script to generate dst, src1, src2 instead of
+// dst, dst2, src1.
+class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
+class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
+class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
+class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
+class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
+class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
+class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
+class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
+class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
+class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
+class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
+class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
+class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
+
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
+class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
+class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
+
+class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<5> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+ let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
+class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
+class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<9> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{8-6};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
+class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<10> src2;
+ bits<3> src2_vector;
+ bits<3> src3;
+
+ let src2_vector = src2{9-7};
+ let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
+class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
+class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
+class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
+class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
+class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
+class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
+class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
+class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
+class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
+class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<5> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
+class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
+class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
+class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
+class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
+class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
+class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
+class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
+class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
+class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<9> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{8-6};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
+class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
+class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
+class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<10> src3;
+ bits<3> src3_vector;
+ bits<3> src4;
+
+ let src3_vector = src3{9-7};
+ let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
+class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
+class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
+class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
+
+class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<1> src2;
+
+ let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
+class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
+class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
+class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
+class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
+class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
+class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
+
+class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
+class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
+class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<1> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
+ let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
+}
+
+class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
+class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<5> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
+class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
+class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
+class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
+class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
+class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
+class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
+class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
+class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
+class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> src2;
+ bits<1> src3;
+ bits<3> src4;
+
+ let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
+ let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
+class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
+class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
+class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
+
+
+class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<1> src3;
+
+ let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
+ let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
+}
+
+class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
+class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
+class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
+class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
+class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
+class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
+
+class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
+}
+
+class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
+class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
+
+class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b00011001111, src3{4-0} };
+ let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
+}
+
+class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
+class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
+
+
+class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
+ let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
+class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
+
+class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
+ bits<2> src1;
+ bits<5> dst;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
+ let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
+}
+
+class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
+class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
+
+class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
+ let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
+class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
+class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
+class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
+class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
+class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
+class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
+class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
+class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
+class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
+class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
+class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
+class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
+class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
+class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
+
+class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<3> src3;
+
+ let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
+ let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
+}
+
+class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
+class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
+class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
+class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
+class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
+class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
+class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
+class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
+
+class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+ bits<2> src2;
+
+ let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
+}
+
+class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
+class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
+class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
+class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
+class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
+
+class V6_pred_not_enc : OpcodeHexagon {
+ bits<2> dst;
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b0001111000000011 };
+ let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
+}
+
+class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<2> src1;
+ bits<5> src2;
+ bits<5> src3;
+
+ let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
+ let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
+class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
+
+class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{15-5}, src1{4-0} };
+ let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
+}
+
+class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
+class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
+class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
+
+
+class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
+ bits<2> dst;
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
+ let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
+}
+
+class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
+class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
+
+class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<6> src2;
+
+ let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
+class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
+class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
+class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
+class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
+class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
+
+class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { opc{14-4}, src1{4-0} };
+ let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
+class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
+class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
+class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
+class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
+class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
+class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
+class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
+
+class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
+
+ let Inst{31-16} = { opc{24-10}, 0 };
+ let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
+}
+
+class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
+class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
+class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
+class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
+
+class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
+ let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
+}
+
+class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
+class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
+
+class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
+ bits<5> src1;
+
+ let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
+ let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
+}
+
+class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
+class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
+
+class A5_ACS_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010101, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
+
+class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<2> src3;
+
+ let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
+ let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
+}
+
+class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
+class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
+class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
+
+class V6_vhistq_enc : OpcodeHexagon {
+ bits<2> src1;
+
+ let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
+ let Inst{13-0} = { 0b10000010000000 };
+}
+
+// TODO: Change script to generate dst1 instead of dst.
+class A6_vminub_RdP_enc : OpcodeHexagon {
+ bits<5> dst1;
+ bits<2> dst2;
+ bits<5> src1;
+ bits<5> src2;
+
+ let Inst{31-16} = { 0b11101010111, src2{4-0} };
+ let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 44bab29..3c5ec17 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -34,6 +34,8 @@ class SubTarget<bits<6> value> {
def HasAnySubT : SubTarget<0x3f>; // 111111
def HasV5SubT : SubTarget<0x3e>; // 111110
+def HasV55SubT : SubTarget<0x3c>; // 111100
+def HasV60SubT : SubTarget<0x38>; // 111000
// Addressing modes for load/store instructions
class AddrModeType<bits<3> value> {
@@ -57,6 +59,8 @@ def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
def WordAccess : MemAccessSize<3>;// Word access instruction (memw).
def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+def Vector64Access : MemAccessSize<7>;// Vector access instruction (memv)
+def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv)
//===----------------------------------------------------------------------===//
@@ -167,14 +171,23 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
bits<1> isFP = 0;
let TSFlags {48} = isFP; // Floating-point.
+ bits<1> hasNewValue2 = 0;
+ let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+ bits<3> opNewValue2 = 0;
+ let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+
+ bits<1> isAccumulator = 0;
+ let TSFlags{54} = isAccumulator;
+
// Fields used for relation models.
+ bit isNonTemporal = 0;
+ string isNT = ""; // set to "true" for non-temporal vector stores.
string BaseOpcode = "";
string CextOpcode = "";
string PredSense = "";
string PNewValue = "";
string NValueST = ""; // Set to "true" for new-value stores.
string InputType = ""; // Input is "imm" or "reg" type.
- string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
string isFloat = "false"; // Set to "true" for the floating-point load/store.
string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
@@ -182,6 +195,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
"");
let PNewValue = !if(isPredicatedNew, "new", "");
let NValueST = !if(isNVStore, "true", "false");
+ let isNT = !if(isNonTemporal, "true", "false");
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
@@ -217,6 +231,11 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
+let mayLoad = 1 in
+class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -234,6 +253,12 @@ class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+// Same as ST0Inst but doesn't derive from OpcodeHexagon.
+let mayStore = 1 in
+class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -277,6 +302,11 @@ class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
OpcodeHexagon;
+// Same as above but doesn't derive from OpcodeHexagon
+class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
@@ -294,6 +324,10 @@ class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
OpcodeHexagon;
+class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
@@ -402,3 +436,13 @@ include "HexagonInstrFormatsV4.td"
//===----------------------------------------------------------------------===//
// V4 Instruction Format Definitions +
//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index db83ef6..2d1dea5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -21,8 +21,6 @@ def TypeMEMOP : IType<9>;
def TypeNV : IType<10>;
def TypeDUPLEX : IType<11>;
def TypeCOMPOUND : IType<12>;
-def TypeAG_VX : IType<28>;
-def TypeAG_VM : IType<29>;
def TypePREFIX : IType<30>;
// Duplex Instruction Class Declaration
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
new file mode 100644
index 0000000..f3d43de
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -0,0 +1,238 @@
+//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+// Hexagon Intruction Flags +
+//
+// *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeCVI_VA : IType<13>;
+def TypeCVI_VA_DV : IType<14>;
+def TypeCVI_VX : IType<15>;
+def TypeCVI_VX_DV : IType<16>;
+def TypeCVI_VP : IType<17>;
+def TypeCVI_VP_VS : IType<18>;
+def TypeCVI_VS : IType<19>;
+def TypeCVI_VINLANESAT : IType<20>;
+def TypeCVI_VM_LD : IType<21>;
+def TypeCVI_VM_TMP_LD : IType<22>;
+def TypeCVI_VM_CUR_LD : IType<23>;
+def TypeCVI_VM_VP_LDU : IType<24>;
+def TypeCVI_VM_ST : IType<25>;
+def TypeCVI_VM_NEW_ST : IType<26>;
+def TypeCVI_VM_STU : IType<27>;
+def TypeCVI_HIST : IType<28>;
+//----------------------------------------------------------------------------//
+// Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_late<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_LATE>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_SLOT2>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VP_VS_LONG_EARLY>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VINLANESAT>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VS>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_TMP_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_CUR_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_VP_LDU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_NEW_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VM_STU>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+}
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VA>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_VX_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+ Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
+ list<dag> pattern = [], string cstr = "",
+ InstrItinClass itin = CVI_HIST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+ Requires<[HasV60T, UseHVX]>;
+}
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 3cb0823..eb3590c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,9 +23,11 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cctype>
using namespace llvm;
@@ -36,9 +38,41 @@ using namespace llvm;
#include "HexagonGenInstrInfo.inc"
#include "HexagonGenDFAPacketizer.inc"
+using namespace llvm;
+
+cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
+ cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
+ "packetization boundary."));
+
+static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
+ cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
+
+static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable schedule adjustment for new value stores."));
+
+static cl::opt<bool> EnableTimingClassLatency(
+ "enable-timing-class-latency", cl::Hidden, cl::init(false),
+ cl::desc("Enable timing class latency"));
+
+static cl::opt<bool> EnableALUForwarding(
+ "enable-alu-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec alu forwarding"));
+
+static cl::opt<bool> EnableACCForwarding(
+ "enable-acc-forwarding", cl::Hidden, cl::init(true),
+ cl::desc("Enable vec acc forwarding"));
+
+static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+
///
/// Constants for Hexagon instructions.
///
+const int Hexagon_MEMV_OFFSET_MAX_128B = 2047; // #s7
+const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7
+const int Hexagon_MEMV_OFFSET_MAX = 1023; // #s6
+const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6
const int Hexagon_MEMW_OFFSET_MAX = 4095;
const int Hexagon_MEMW_OFFSET_MIN = -4096;
const int Hexagon_MEMD_OFFSET_MAX = 8191;
@@ -57,71 +91,49 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14;
const int Hexagon_MEMH_AUTOINC_MIN = -16;
const int Hexagon_MEMB_AUTOINC_MAX = 7;
const int Hexagon_MEMB_AUTOINC_MIN = -8;
+const int Hexagon_MEMV_AUTOINC_MAX = 192;
+const int Hexagon_MEMV_AUTOINC_MIN = -256;
+const int Hexagon_MEMV_AUTOINC_MAX_128B = 384;
+const int Hexagon_MEMV_AUTOINC_MIN_128B = -512;
// Pin the vtable to this file.
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
- RI(), Subtarget(ST) {}
+ RI() {}
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
+static bool isIntRegForSubInst(unsigned Reg) {
+ return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+ (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
+}
- switch (MI->getOpcode()) {
- default: break;
- case Hexagon::L2_loadri_io:
- case Hexagon::L2_loadrd_io:
- case Hexagon::L2_loadrh_io:
- case Hexagon::L2_loadrb_io:
- case Hexagon::L2_loadrub_io:
- if (MI->getOperand(2).isFI() &&
- MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
- FrameIndex = MI->getOperand(2).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
+
+static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
+ return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
+ isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
}
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot. If
-/// not, return 0. This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default: break;
- case Hexagon::S2_storeri_io:
- case Hexagon::S2_storerd_io:
- case Hexagon::S2_storerh_io:
- case Hexagon::S2_storerb_io:
- if (MI->getOperand(2).isFI() &&
- MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
- FrameIndex = MI->getOperand(0).getIndex();
- return MI->getOperand(2).getReg();
- }
- break;
+/// Calculate number of instructions excluding the debug instructions.
+static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
+ MachineBasicBlock::const_instr_iterator MIE) {
+ unsigned Count = 0;
+ for (; MIB != MIE; ++MIB) {
+ if (!MIB->isDebugValue())
+ ++Count;
}
- return 0;
+ return Count;
}
-// Find the hardware loop instruction used to set-up the specified loop.
-// On Hexagon, we have two instructions used to set-up the hardware loop
-// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
-// to indicate the end of a loop.
-static MachineInstr *
-findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
- SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+
+/// Find the hardware loop instruction used to set-up the specified loop.
+/// On Hexagon, we have two instructions used to set-up the hardware loop
+/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+/// to indicate the end of a loop.
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
int LOOPi;
int LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
@@ -157,100 +169,108 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
return 0;
}
-unsigned HexagonInstrInfo::InsertBranch(
- MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
- Opcode_t BOpc = Hexagon::J2_jump;
- Opcode_t BccOpc = Hexagon::J2_jumpt;
+/// Gather register def/uses from MI.
+/// This treats possible (predicated) defs as actually happening ones
+/// (conservatively).
+static inline void parseOperands(const MachineInstr *MI,
+ SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
+ Defs.clear();
+ Uses.clear();
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
- // Check if ReverseBranchCondition has asked to reverse this branch
- // If we want to reverse the branch an odd number of times, we want
- // J2_jumpf.
- if (!Cond.empty() && Cond[0].isImm())
- BccOpc = Cond[0].getImm();
+ if (!MO.isReg())
+ continue;
- if (!FBB) {
- if (Cond.empty()) {
- // Due to a bug in TailMerging/CFG Optimization, we need to add a
- // special case handling of a predicated jump followed by an
- // unconditional jump. If not, Tail Merging and CFG Optimization go
- // into an infinite loop.
- MachineBasicBlock *NewTBB, *NewFBB;
- SmallVector<MachineOperand, 4> Cond;
- MachineInstr *Term = MBB.getFirstTerminator();
- if (Term != MBB.end() && isPredicated(Term) &&
- !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
- MachineBasicBlock *NextBB =
- std::next(MachineFunction::iterator(&MBB));
- if (NewTBB == NextBB) {
- ReverseBranchCondition(Cond);
- RemoveBranch(MBB);
- return InsertBranch(MBB, TBB, nullptr, Cond, DL);
- }
- }
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
- } else if (isEndLoopN(Cond[0].getImm())) {
- int EndLoopOp = Cond[0].getImm();
- assert(Cond[1].isMBB());
- // Since we're adding an ENDLOOP, there better be a LOOP instruction.
- // Check for it, and change the BB target if needed.
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
- assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
- Loop->getOperand(0).setMBB(TBB);
- // Add the ENDLOOP after the finding the LOOP0.
- BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
- } else if (isNewValueJump(Cond[0].getImm())) {
- assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
- // New value jump
- // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
- // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
- unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
- DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
- if (Cond[2].isReg()) {
- unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
- addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
- } else if(Cond[2].isImm()) {
- BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
- addImm(Cond[2].getImm()).addMBB(TBB);
- } else
- llvm_unreachable("Invalid condition for branching");
- } else {
- assert((Cond.size() == 2) && "Malformed cond vector");
- const MachineOperand &RO = Cond[1];
- unsigned Flags = getUndefRegState(RO.isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
- }
- return 1;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ if (MO.isUse())
+ Uses.push_back(MO.getReg());
+
+ if (MO.isDef())
+ Defs.push_back(MO.getReg());
}
- assert((!Cond.empty()) &&
- "Cond. cannot be empty when multiple branchings are required");
- assert((!isNewValueJump(Cond[0].getImm())) &&
- "NV-jump cannot be inserted with another branch");
- // Special case for hardware loops. The condition is a basic block.
- if (isEndLoopN(Cond[0].getImm())) {
- int EndLoopOp = Cond[0].getImm();
- assert(Cond[1].isMBB());
- // Since we're adding an ENDLOOP, there better be a LOOP instruction.
- // Check for it, and change the BB target if needed.
- SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
- assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
- Loop->getOperand(0).setMBB(TBB);
- // Add the ENDLOOP after the finding the LOOP0.
- BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
- } else {
- const MachineOperand &RO = Cond[1];
- unsigned Flags = getUndefRegState(RO.isUndef());
- BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+}
+
+
+// Position dependent, so check twice for swap.
+static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+ switch (Ga) {
+ case HexagonII::HSIG_None:
+ default:
+ return false;
+ case HexagonII::HSIG_L1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_L2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S1:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_S2:
+ return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+ Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+ Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_A:
+ return (Gb == HexagonII::HSIG_A);
+ case HexagonII::HSIG_Compound:
+ return (Gb == HexagonII::HSIG_Compound);
}
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+ return false;
+}
- return 2;
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::L2_loadri_io:
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadrb_io:
+ case Hexagon::L2_loadrub_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (MI->getOperand(2).isFI() &&
+ MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+ FrameIndex = MI->getOperand(0).getIndex();
+ return MI->getOperand(2).getReg();
+ }
+ break;
+ }
+ return 0;
}
@@ -269,9 +289,6 @@ unsigned HexagonInstrInfo::InsertBranch(
/// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
/// Cond[1] = R
/// Cond[2] = Imm
-/// @note Related function is \fn findInstrPredicate which fills in
-/// Cond. vector when a predicated instruction is passed to it.
-/// We follow same protocol in that case too.
///
bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@@ -314,7 +331,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
-
+
bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
I->getOperand(0).isMBB();
// Delete the J2_jump if it's equivalent to a fall-through.
@@ -327,17 +344,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
- if (!isUnpredicatedTerminator(I))
+ if (!isUnpredicatedTerminator(&*I))
return false;
// Get the last instruction in the block.
- MachineInstr *LastInst = I;
+ MachineInstr *LastInst = &*I;
MachineInstr *SecondLastInst = nullptr;
// Find one more terminator if present.
- do {
- if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+ for (;;) {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
if (!SecondLastInst)
- SecondLastInst = I;
+ SecondLastInst = &*I;
else
// This is a third branch.
return true;
@@ -345,7 +362,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
if (I == MBB.instr_begin())
break;
--I;
- } while(I);
+ }
int LastOpcode = LastInst->getOpcode();
int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
@@ -418,7 +435,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// executed, so remove it.
if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
+ I = LastInst->getIterator();
if (AllowModify)
I->eraseFromParent();
return false;
@@ -438,6 +455,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
+
unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
MachineBasicBlock::iterator I = MBB.end();
@@ -458,100 +476,127 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return Count;
}
-/// \brief For a comparison instruction, return the source registers in
-/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
-/// compares against in CmpValue. Return true if the comparison instruction
-/// can be analyzed.
-bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const {
- unsigned Opc = MI->getOpcode();
- // Set mask and the first source register.
- switch (Opc) {
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqp:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtup:
- case Hexagon::C4_cmpneq:
- case Hexagon::C4_cmplte:
- case Hexagon::C4_cmplteu:
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgtui:
- case Hexagon::C4_cmpneqi:
- case Hexagon::C4_cmplteui:
- case Hexagon::C4_cmpltei:
- SrcReg = MI->getOperand(1).getReg();
- Mask = ~0;
- break;
- case Hexagon::A4_cmpbeq:
- case Hexagon::A4_cmpbgt:
- case Hexagon::A4_cmpbgtu:
- case Hexagon::A4_cmpbeqi:
- case Hexagon::A4_cmpbgti:
- case Hexagon::A4_cmpbgtui:
- SrcReg = MI->getOperand(1).getReg();
- Mask = 0xFF;
- break;
- case Hexagon::A4_cmpheq:
- case Hexagon::A4_cmphgt:
- case Hexagon::A4_cmphgtu:
- case Hexagon::A4_cmpheqi:
- case Hexagon::A4_cmphgti:
- case Hexagon::A4_cmphgtui:
- SrcReg = MI->getOperand(1).getReg();
- Mask = 0xFFFF;
- break;
- }
+unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+ unsigned BOpc = Hexagon::J2_jump;
+ unsigned BccOpc = Hexagon::J2_jumpt;
+ assert(validateBranchCond(Cond) && "Invalid branching condition");
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- // Set the value/second source register.
- switch (Opc) {
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqp:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgtp:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtup:
- case Hexagon::A4_cmpbeq:
- case Hexagon::A4_cmpbgt:
- case Hexagon::A4_cmpbgtu:
- case Hexagon::A4_cmpheq:
- case Hexagon::A4_cmphgt:
- case Hexagon::A4_cmphgtu:
- case Hexagon::C4_cmpneq:
- case Hexagon::C4_cmplte:
- case Hexagon::C4_cmplteu:
- SrcReg2 = MI->getOperand(2).getReg();
- return true;
+ // Check if ReverseBranchCondition has asked to reverse this branch
+ // If we want to reverse the branch an odd number of times, we want
+ // J2_jumpf.
+ if (!Cond.empty() && Cond[0].isImm())
+ BccOpc = Cond[0].getImm();
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgtui:
- case Hexagon::C2_cmpgti:
- case Hexagon::C4_cmpneqi:
- case Hexagon::C4_cmplteui:
- case Hexagon::C4_cmpltei:
- case Hexagon::A4_cmpbeqi:
- case Hexagon::A4_cmpbgti:
- case Hexagon::A4_cmpbgtui:
- case Hexagon::A4_cmpheqi:
- case Hexagon::A4_cmphgti:
- case Hexagon::A4_cmphgtui:
- SrcReg2 = 0;
- Value = MI->getOperand(2).getImm();
- return true;
+ if (!FBB) {
+ if (Cond.empty()) {
+ // Due to a bug in TailMerging/CFG Optimization, we need to add a
+ // special case handling of a predicated jump followed by an
+ // unconditional jump. If not, Tail Merging and CFG Optimization go
+ // into an infinite loop.
+ MachineBasicBlock *NewTBB, *NewFBB;
+ SmallVector<MachineOperand, 4> Cond;
+ MachineInstr *Term = MBB.getFirstTerminator();
+ if (Term != MBB.end() && isPredicated(Term) &&
+ !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+ MachineBasicBlock *NextBB = &*++MBB.getIterator();
+ if (NewTBB == NextBB) {
+ ReverseBranchCondition(Cond);
+ RemoveBranch(MBB);
+ return InsertBranch(MBB, TBB, nullptr, Cond, DL);
+ }
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+ } else if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else if (isNewValueJump(Cond[0].getImm())) {
+ assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+ // New value jump
+ // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+ // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+ unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+ DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+ if (Cond[2].isReg()) {
+ unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+ } else if(Cond[2].isImm()) {
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+ addImm(Cond[2].getImm()).addMBB(TBB);
+ } else
+ llvm_unreachable("Invalid condition for branching");
+ } else {
+ assert((Cond.size() == 2) && "Malformed cond vector");
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ return 1;
}
+ assert((!Cond.empty()) &&
+ "Cond. cannot be empty when multiple branchings are required");
+ assert((!isNewValueJump(Cond[0].getImm())) &&
+ "NV-jump cannot be inserted with another branch");
+ // Special case for hardware loops. The condition is a basic block.
+ if (isEndLoopN(Cond[0].getImm())) {
+ int EndLoopOp = Cond[0].getImm();
+ assert(Cond[1].isMBB());
+ // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+ // Check for it, and change the BB target if needed.
+ SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+ Loop->getOperand(0).setMBB(TBB);
+ // Add the ENDLOOP after the finding the LOOP0.
+ BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+ } else {
+ const MachineOperand &RO = Cond[1];
+ unsigned Flags = getUndefRegState(RO.isUndef());
+ BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+ }
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
- return false;
+ return 2;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ BranchProbability Probability) const {
+ return nonDbgBBSize(&MBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
+ const {
+ return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs, BranchProbability Probability) const {
+ return NumInstrs <= 4;
}
void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+ MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ auto &HRI = getRegisterInfo();
if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
return;
@@ -599,28 +644,74 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
addReg(SrcReg, getKillRegState(KillSrc));
return;
}
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ Hexagon::IntRegsRegClass.contains(DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg).
+ addReg(SrcReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(SrcReg) &&
+ Hexagon::VectorRegsRegClass.contains(DestReg)) {
+ llvm_unreachable("Unimplemented pred to vec");
+ return;
+ }
+ if (Hexagon::VecPredRegsRegClass.contains(DestReg) &&
+ Hexagon::VectorRegsRegClass.contains(SrcReg)) {
+ llvm_unreachable("Unimplemented vec to pred");
+ return;
+ }
+ if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_hireg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+ getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+ HRI.getSubReg(DestReg, Hexagon::subreg_loreg)).
+ addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+ getKillRegState(KillSrc));
+ return;
+ }
+#ifndef NDEBUG
+ // Show the invalid registers to ease debugging.
+ dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber()
+ << ": " << PrintReg(DestReg, &HRI)
+ << " = " << PrintReg(SrcReg, &HRI) << '\n';
+#endif
llvm_unreachable("Unimplemented");
}
-void HexagonInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
-
+void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), Align);
if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
@@ -640,33 +731,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
-void HexagonInstrInfo::storeRegToAddr(
- MachineFunction &MF, unsigned SrcReg,
- bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
- llvm_unreachable("Unimplemented");
-}
-
-
-void HexagonInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, unsigned DestReg, int FI,
+ const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(
- MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- Align);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), Align);
if (RC == &Hexagon::IntRegsRegClass) {
BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
@@ -682,27 +757,136 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
}
-void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const {
- llvm_unreachable("Unimplemented");
-}
-bool
-HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- const HexagonRegisterInfo &TRI = getRegisterInfo();
+/// expandPostRAPseudo - This function is called for all pseudo instructions
+/// that remain after register allocation. Many pseudo instructions are
+/// created to help register allocation. This is the place to convert them
+/// into real instructions. The target can edit MI in place, or it can insert
+/// new instructions and erase MI. The function should return true if
+/// anything was changed.
+bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI)
+ const {
+ const HexagonRegisterInfo &HRI = getRegisterInfo();
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned Opc = MI->getOpcode();
+ const unsigned VecOffset = 1;
+ bool Is128B = false;
switch (Opc) {
case Hexagon::ALIGNA:
BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
- .addReg(TRI.getFrameRegister())
+ .addReg(HRI.getFrameRegister())
.addImm(-MI->getOperand(1).getImm());
MBB.erase(MI);
return true;
+ case Hexagon::HEXAGON_V6_vassignp_128B:
+ case Hexagon::HEXAGON_V6_vassignp: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (SrcReg != DstReg)
+ copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_lo_128B:
+ case Hexagon::HEXAGON_V6_lo: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubLo);
+ return true;
+ }
+ case Hexagon::HEXAGON_V6_hi_128B:
+ case Hexagon::HEXAGON_V6_hi: {
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill());
+ MBB.erase(MI);
+ MRI.clearKillFlags(SrcSubHi);
+ return true;
+ }
+ case Hexagon::STrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::STrivv_indexed: {
+ unsigned SrcReg = MI->getOperand(2).getReg();
+ unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+ unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ .addImm(MI->getOperand(1).getImm())
+ .addReg(SrcSubLo)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MI1New->getOperand(0).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd))
+ .addOperand(MI->getOperand(0))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(1).getImm()+Offset)
+ .addReg(SrcSubHi)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ Is128B = true;
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed: {
+ unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+ MachineInstr *MI1New =
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+ .addOperand(MI->getOperand(1))
+ .addImm(MI->getOperand(2).getImm());
+ MI1New->getOperand(1).setIsKill(false);
+ BuildMI(MBB, MI, DL, get(NewOpcd),
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+ .addOperand(MI->getOperand(1))
+ // The Vectors are indexed in multiples of vector size.
+ .addImm(MI->getOperand(2).getImm() + Offset)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::LDriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::LDriv_pseudo_V6: {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+ : Hexagon::V6_vL32b_ai;
+ int32_t Off = MI->getOperand(2).getImm();
+ int32_t Idx = Off;
+ BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+ .addOperand(MI->getOperand(1))
+ .addImm(Idx)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::STriv_pseudo_V6_128B:
+ Is128B = true;
+ case Hexagon::STriv_pseudo_V6: {
+ unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+ : Hexagon::V6_vS32b_ai;
+ int32_t Off = MI->getOperand(1).getImm();
+ int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6);
+ BuildMI(MBB, MI, DL, get(NewOpc))
+ .addOperand(MI->getOperand(0))
+ .addImm(Idx)
+ .addOperand(MI->getOperand(2))
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ MBB.erase(MI);
+ return true;
+ }
case Hexagon::TFR_PdTrue: {
unsigned Reg = MI->getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
@@ -724,15 +908,15 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned DstReg = MI->getOperand(0).getReg();
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
- unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
- unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
- unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
- unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
- TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
- TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
@@ -747,17 +931,17 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
unsigned Src1Reg = MI->getOperand(1).getReg();
unsigned Src2Reg = MI->getOperand(2).getReg();
unsigned Src3Reg = MI->getOperand(3).getReg();
- unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
- unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
- unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
- unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
- unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
- unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+ unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+ unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+ unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+ unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+ unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+ unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
- TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+ HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
.addReg(Src2SubHi).addReg(Src3SubHi);
BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
- TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+ HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
.addReg(Src2SubLo).addReg(Src3SubLo);
MBB.erase(MI);
MRI.clearKillFlags(Src1SubHi);
@@ -768,104 +952,168 @@ HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MRI.clearKillFlags(Src3SubLo);
return true;
}
+ case Hexagon::MUX64_rr: {
+ const MachineOperand &Op0 = MI->getOperand(0);
+ const MachineOperand &Op1 = MI->getOperand(1);
+ const MachineOperand &Op2 = MI->getOperand(2);
+ const MachineOperand &Op3 = MI->getOperand(3);
+ unsigned Rd = Op0.getReg();
+ unsigned Pu = Op1.getReg();
+ unsigned Rs = Op2.getReg();
+ unsigned Rt = Op3.getReg();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned K1 = getKillRegState(Op1.isKill());
+ unsigned K2 = getKillRegState(Op2.isKill());
+ unsigned K3 = getKillRegState(Op3.isKill());
+ if (Rd != Rs)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
+ .addReg(Pu, (Rd == Rt) ? K1 : 0)
+ .addReg(Rs, K2);
+ if (Rd != Rt)
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
+ .addReg(Pu, K1)
+ .addReg(Rt, K3);
+ MBB.erase(MI);
+ return true;
+ }
case Hexagon::TCRETURNi:
MI->setDesc(get(Hexagon::J2_jump));
return true;
case Hexagon::TCRETURNr:
MI->setDesc(get(Hexagon::J2_jumpr));
return true;
+ case Hexagon::TFRI_f:
+ case Hexagon::TFRI_cPt_f:
+ case Hexagon::TFRI_cNotPt_f: {
+ unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
+ APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF();
+ APInt IVal = FVal.bitcastToAPInt();
+ MI->RemoveOperand(Opx);
+ unsigned NewOpc = (Opc == Hexagon::TFRI_f) ? Hexagon::A2_tfrsi :
+ (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
+ Hexagon::C2_cmoveif;
+ MI->setDesc(get(NewOpc));
+ MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+ return true;
+ }
}
return false;
}
-MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(
- MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt, int FI) const {
- // Hexagon_TODO: Implement.
- return nullptr;
+
+// We indicate that we want to reverse the branch by
+// inserting the reversed branching opcode.
+bool HexagonInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return true;
+ assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+ unsigned opcode = Cond[0].getImm();
+ //unsigned temp;
+ assert(get(opcode).isBranch() && "Should be a branching condition.");
+ if (isEndLoopN(opcode))
+ return true;
+ unsigned NewOpcode = getInvertedPredicatedOpcode(opcode);
+ Cond[0].setImm(NewOpcode);
+ return false;
}
-unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- const TargetRegisterClass *TRC;
- if (VT == MVT::i1) {
- TRC = &Hexagon::PredRegsRegClass;
- } else if (VT == MVT::i32 || VT == MVT::f32) {
- TRC = &Hexagon::IntRegsRegClass;
- } else if (VT == MVT::i64 || VT == MVT::f64) {
- TRC = &Hexagon::DoubleRegsRegClass;
- } else {
- llvm_unreachable("Cannot handle this register class");
- }
+void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
+}
- unsigned NewReg = RegInfo.createVirtualRegister(TRC);
- return NewReg;
+
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
+// Note: New-value stores are not included here as in the current
+// implementation, we don't need to check their predicate sense.
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
}
-bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
- const MCInstrDesc &MID = MI->getDesc();
- const uint64_t F = MID.TSFlags;
- if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
- return true;
- // TODO: This is largely obsolete now. Will need to be removed
- // in consecutive patches.
- switch(MI->getOpcode()) {
- // TFR_FI Remains a special case.
- case Hexagon::TFR_FI:
- return true;
- default:
- return false;
+bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI,
+ ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
+ isEndLoopN(Cond[0].getImm())) {
+ DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+ return false;
}
- return false;
-}
+ int Opc = MI->getOpcode();
+ assert (isPredicable(MI) && "Expected predicable instruction");
+ bool invertJump = predOpcodeHasNot(Cond);
-// This returns true in two cases:
-// - The OP code itself indicates that this is an extended instruction.
-// - One of MOs has been marked with HMOTF_ConstExtended flag.
-bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
- // First check if this is permanently extended op code.
- const uint64_t F = MI->getDesc().TSFlags;
- if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
- return true;
- // Use MO operand flags to determine if one of MI's operands
- // has HMOTF_ConstExtended flag set.
- for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
- E = MI->operands_end(); I != E; ++I) {
- if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
- return true;
+ // We have to predicate MI "in place", i.e. after this function returns,
+ // MI will need to be transformed into a predicated form. To avoid com-
+ // plicated manipulations with the operands (handling tied operands,
+ // etc.), build a new temporary instruction, then overwrite MI with it.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned PredOpc = getCondOpcode(Opc, invertJump);
+ MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+ unsigned NOp = 0, NumOps = MI->getNumOperands();
+ while (NOp < NumOps) {
+ MachineOperand &Op = MI->getOperand(NOp);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ break;
+ T.addOperand(Op);
+ NOp++;
}
- return false;
-}
-bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
- return MI->getDesc().isBranch();
-}
+ unsigned PredReg, PredRegPos, PredRegFlags;
+ bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+ (void)GotPredReg;
+ assert(GotPredReg);
+ T.addReg(PredReg, PredRegFlags);
+ while (NOp < NumOps)
+ T.addOperand(MI->getOperand(NOp++));
-bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
- if (isNewValueJump(MI))
- return true;
+ MI->setDesc(get(PredOpc));
+ while (unsigned n = MI->getNumOperands())
+ MI->RemoveOperand(n-1);
+ for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+ MI->addOperand(T->getOperand(i));
- if (isNewValueStore(MI))
- return true;
+ MachineBasicBlock::instr_iterator TI = T->getIterator();
+ B.erase(TI);
- return false;
+ MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+ MRI.clearKillFlags(PredReg);
+ return true;
}
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
-bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
- const uint64_t F = get(Opcode).TSFlags;
- return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
+bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const {
+ // TODO: Fix this
+ return false;
}
-bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
- return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+
+bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ auto &HRI = getRegisterInfo();
+ for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+ MachineOperand MO = MI->getOperand(oper);
+ if (MO.isReg() && MO.isDef()) {
+ const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
+ if (RC == &Hexagon::PredRegsRegClass) {
+ Pred.push_back(MO);
+ return true;
+ }
+ }
+ }
+ return false;
}
bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
@@ -875,10 +1123,21 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return false;
const int Opc = MI->getOpcode();
+ int NumOperands = MI->getNumOperands();
+
+ // Keep a flag for upto 4 operands in the instructions, to indicate if
+ // that operand has been constant extended.
+ bool OpCExtended[4];
+ if (NumOperands > 4)
+ NumOperands = 4;
+
+ for (int i = 0; i < NumOperands; i++)
+ OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI));
switch(Opc) {
case Hexagon::A2_tfrsi:
- return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
+ return (isOperandExtended(MI, 1) && isConstExtended(MI)) ||
+ isInt<12>(MI->getOperand(1).getImm());
case Hexagon::S2_storerd_io:
return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
@@ -926,8 +1185,8 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
case Hexagon::S4_storeirb_io:
case Hexagon::S4_storeirh_io:
case Hexagon::S4_storeiri_io:
- return (isUInt<6>(MI->getOperand(1).getImm()) &&
- isInt<6>(MI->getOperand(2).getImm()));
+ return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) &&
+ (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm()));
case Hexagon::A2_addi:
return isInt<8>(MI->getOperand(2).getImm());
@@ -944,269 +1203,1117 @@ bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-// This function performs the following inversiones:
-//
-// cPt ---> cNotPt
-// cNotPt ---> cPt
-//
-unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
- int InvPredOpcode;
- InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
- : Hexagon::getTruePredOpcode(Opc);
- if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
- return InvPredOpcode;
- switch(Opc) {
- default: llvm_unreachable("Unexpected predicated instruction");
- case Hexagon::C2_ccombinewt:
- return Hexagon::C2_ccombinewf;
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB, const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Throwing call is a boundary.
+ if (MI->isCall()) {
+ // If any of the block's successors is a landing pad, this could be a
+ // throwing call.
+ for (auto I : MBB->successors())
+ if (I->isEHPad())
+ return true;
+ }
+
+ // Don't mess around with no return calls.
+ if (MI->getOpcode() == Hexagon::CALLv3nr)
+ return true;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isPosition())
+ return true;
+
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
+ return true;
+
+ return false;
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// Hexagon counts the number of ##'s and adjust for that many
+/// constant exenders.
+unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const {
+ StringRef AStr(Str);
+ // Count the number of instructions in the asm.
+ bool atInsnStart = true;
+ unsigned Length = 0;
+ for (; *Str; ++Str) {
+ if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+ strlen(MAI.getSeparatorString())) == 0)
+ atInsnStart = true;
+ if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+ Length += MAI.getMaxInstLength();
+ atInsnStart = false;
+ }
+ if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+ strlen(MAI.getCommentString())) == 0)
+ atInsnStart = false;
+ }
+
+ // Add to size number of constant extenders seen * 4.
+ StringRef Occ("##");
+ Length += AStr.count(Occ)*4;
+ return Length;
+}
+
+
+ScheduleHazardRecognizer*
+HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+ return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+
+/// \brief For a comparison instruction, return the source registers in
+/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
+/// compares against in CmpValue. Return true if the comparison instruction
+/// can be analyzed.
+bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Set mask and the first source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = ~0;
+ break;
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFF;
+ break;
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg = MI->getOperand(1).getReg();
+ Mask = 0xFFFF;
+ break;
+ }
+
+ // Set the value/second source register.
+ switch (Opc) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqp:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtp:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtup:
+ case Hexagon::A4_cmpbeq:
+ case Hexagon::A4_cmpbgt:
+ case Hexagon::A4_cmpbgtu:
+ case Hexagon::A4_cmpheq:
+ case Hexagon::A4_cmphgt:
+ case Hexagon::A4_cmphgtu:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C4_cmpneqi:
+ case Hexagon::C4_cmplteui:
+ case Hexagon::C4_cmpltei:
+ case Hexagon::A4_cmpbeqi:
+ case Hexagon::A4_cmpbgti:
+ case Hexagon::A4_cmpbgtui:
+ case Hexagon::A4_cmpheqi:
+ case Hexagon::A4_cmphgti:
+ case Hexagon::A4_cmphgtui:
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI, unsigned *PredCost) const {
+ return getInstrTimingClassLatency(ItinData, MI);
+}
+
+
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+ const TargetSubtargetInfo &STI) const {
+ const InstrItineraryData *II = STI.getInstrItineraryData();
+ return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
+}
+
+
+// Inspired by this pair:
+// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
+// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1]
+// Currently AA considers the addresses in these instructions to be aliasing.
+bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+ MachineInstr *MIb, AliasAnalysis *AA) const {
+ int OffsetA = 0, OffsetB = 0;
+ unsigned SizeA = 0, SizeB = 0;
+
+ if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+ MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef())
+ return false;
+
+ // Instructions that are pure loads, not loads and stores like memops are not
+ // dependent.
+ if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb))
+ return true;
+
+ // Get base, offset, and access size in MIa.
+ unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
+ if (!BaseRegA || !SizeA)
+ return false;
+
+ // Get base, offset, and access size in MIb.
+ unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
+ if (!BaseRegB || !SizeB)
+ return false;
+
+ if (BaseRegA != BaseRegB)
+ return false;
+
+ // This is a mem access with the same base register and known offsets from it.
+ // Reason about it.
+ if (OffsetA > OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB);
+ return (SizeB <= offDiff);
+ } else if (OffsetA < OffsetB) {
+ uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA);
+ return (SizeA <= offDiff);
+ }
+
+ return false;
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ const TargetRegisterClass *TRC;
+ if (VT == MVT::i1) {
+ TRC = &Hexagon::PredRegsRegClass;
+ } else if (VT == MVT::i32 || VT == MVT::f32) {
+ TRC = &Hexagon::IntRegsRegClass;
+ } else if (VT == MVT::i64 || VT == MVT::f64) {
+ TRC = &Hexagon::DoubleRegsRegClass;
+ } else {
+ llvm_unreachable("Cannot handle this register class");
+ }
+
+ unsigned NewReg = MRI.createVirtualRegister(TRC);
+ return NewReg;
+}
+
+
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+ return (getAddrMode(MI) == HexagonII::AbsoluteSet);
+}
+
+
+bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
+
+bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+ if (!(isTC1(MI))
+ && !(QII->isTC2Early(MI))
+ && !(MI->getDesc().mayLoad())
+ && !(MI->getDesc().mayStore())
+ && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
+ && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+ && !(QII->isMemOp(MI))
+ && !(MI->isBranch())
+ && !(MI->isReturn())
+ && !MI->isCall())
+ return true;
+
+ return false;
+}
+
+
+// Return true if the instruction is a compund branch instruction.
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
+ return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
+}
+
+
+bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
+ return (MI->isBranch() && isPredicated(MI)) ||
+ isConditionalTransfer(MI) ||
+ isConditionalALU32(MI) ||
+ isConditionalLoad(MI) ||
+ // Predicated stores which don't have a .new on any operands.
+ (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+ !isPredicatedNew(MI));
+}
+
+
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_paddf:
+ case Hexagon::A2_paddfnew:
+ case Hexagon::A2_paddif:
+ case Hexagon::A2_paddifnew:
+ case Hexagon::A2_paddit:
+ case Hexagon::A2_padditnew:
+ case Hexagon::A2_paddt:
+ case Hexagon::A2_paddtnew:
+ case Hexagon::A2_pandf:
+ case Hexagon::A2_pandfnew:
+ case Hexagon::A2_pandt:
+ case Hexagon::A2_pandtnew:
+ case Hexagon::A2_porf:
+ case Hexagon::A2_porfnew:
+ case Hexagon::A2_port:
+ case Hexagon::A2_portnew:
+ case Hexagon::A2_psubf:
+ case Hexagon::A2_psubfnew:
+ case Hexagon::A2_psubt:
+ case Hexagon::A2_psubtnew:
+ case Hexagon::A2_pxorf:
+ case Hexagon::A2_pxorfnew:
+ case Hexagon::A2_pxort:
+ case Hexagon::A2_pxortnew:
+ case Hexagon::A4_paslhf:
+ case Hexagon::A4_paslhfnew:
+ case Hexagon::A4_paslht:
+ case Hexagon::A4_paslhtnew:
+ case Hexagon::A4_pasrhf:
+ case Hexagon::A4_pasrhfnew:
+ case Hexagon::A4_pasrht:
+ case Hexagon::A4_pasrhtnew:
+ case Hexagon::A4_psxtbf:
+ case Hexagon::A4_psxtbfnew:
+ case Hexagon::A4_psxtbt:
+ case Hexagon::A4_psxtbtnew:
+ case Hexagon::A4_psxthf:
+ case Hexagon::A4_psxthfnew:
+ case Hexagon::A4_psxtht:
+ case Hexagon::A4_psxthtnew:
+ case Hexagon::A4_pzxtbf:
+ case Hexagon::A4_pzxtbfnew:
+ case Hexagon::A4_pzxtbt:
+ case Hexagon::A4_pzxtbtnew:
+ case Hexagon::A4_pzxthf:
+ case Hexagon::A4_pzxthfnew:
+ case Hexagon::A4_pzxtht:
+ case Hexagon::A4_pzxthtnew:
case Hexagon::C2_ccombinewf:
- return Hexagon::C2_ccombinewt;
+ case Hexagon::C2_ccombinewt:
+ return true;
+ }
+ return false;
+}
+
+
+// FIXME - Function name and it's functionality don't match.
+// It should be renamed to hasPredNewOpcode()
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
+ if (!MI->getDesc().mayLoad() || !isPredicated(MI))
+ return false;
+
+ int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+ // Instruction with valid predicated-new opcode can be promoted to .new.
+ return PNewOpcode >= 0;
+}
+
- // Dealloc_return.
- case Hexagon::L4_return_t:
- return Hexagon::L4_return_f;
- case Hexagon::L4_return_f:
- return Hexagon::L4_return_t;
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ case Hexagon::S4_pstorerbt_rr:
+ case Hexagon::S4_pstorerbf_rr:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S2_pstorerbt_pi:
+ case Hexagon::S2_pstorerbf_pi:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ case Hexagon::S4_pstorerdt_rr:
+ case Hexagon::S4_pstorerdf_rr:
+ case Hexagon::S2_pstorerdt_pi:
+ case Hexagon::S2_pstorerdf_pi:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ case Hexagon::S4_pstorerht_rr:
+ case Hexagon::S4_pstorerhf_rr:
+ case Hexagon::S2_pstorerht_pi:
+ case Hexagon::S2_pstorerhf_pi:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ case Hexagon::S4_pstorerit_rr:
+ case Hexagon::S4_pstorerif_rr:
+ case Hexagon::S2_pstorerit_pi:
+ case Hexagon::S2_pstorerif_pi:
+
+ // V4 global address store before promoting to dot new.
+ case Hexagon::S4_pstorerdt_abs:
+ case Hexagon::S4_pstorerdf_abs:
+ case Hexagon::S4_pstorerbt_abs:
+ case Hexagon::S4_pstorerbf_abs:
+ case Hexagon::S4_pstorerht_abs:
+ case Hexagon::S4_pstorerhf_abs:
+ case Hexagon::S4_pstorerit_abs:
+ case Hexagon::S4_pstorerif_abs:
+ return true;
+
+ // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+ // from the "Conditional Store" list. Because a predicated new value store
+ // would NOT be promoted to a double dot new store.
+ // This function returns yes for those stores that are predicated but not
+ // yet promoted to predicate dot new instructions.
}
}
-// New Value Store instructions.
-bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrf:
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::A2_tfrtnew:
+ case Hexagon::A2_tfrfnew:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmovenewif:
+ case Hexagon::A2_tfrpt:
+ case Hexagon::A2_tfrpf:
+ return true;
+
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
+// isFPImm and later getFPImm as well.
+bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
+ unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
+ if (isExtended) // Instruction must be extended.
+ return true;
+
+ unsigned isExtendable =
+ (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+ if (!isExtendable)
+ return false;
+
+ if (MI->isCall())
+ return false;
+
+ short ExtOpNum = getCExtOpNum(MI);
+ const MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // Use MO operand flags to determine if MO
+ // has the HMOTF_ConstExtended flag set.
+ if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ // If this is a Machine BB address we are talking about, and it is
+ // not marked as extended, say so.
+ if (MO.isMBB())
+ return false;
+
+ // We could be using an instruction with an extendable immediate and shoehorn
+ // a global address into it. If it is a global address it will be constant
+ // extended. We do this for COMBINE.
+ // We currently only handle isGlobal() because it is the only kind of
+ // object we are going to end up with here for now.
+ // In the future we probably should add isSymbol(), etc.
+ if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
+ MO.isJTI() || MO.isCPI())
+ return true;
- return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
+ // If the extendable operand is not 'Immediate' type, the instruction should
+ // have 'isExtended' flag set.
+ assert(MO.isImm() && "Extendable operand must be Immediate type");
+
+ int MinValue = getMinValue(MI);
+ int MaxValue = getMaxValue(MI);
+ int ImmValue = MO.getImm();
+
+ return (ImmValue < MinValue || ImmValue > MaxValue);
}
-bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ const MCInstrDesc &ProdMCID = ProdMI->getDesc();
+ if (!ProdMCID.getNumDefs())
+ return false;
+
+ auto &HRI = getRegisterInfo();
+
+ SmallVector<unsigned, 4> DefsA;
+ SmallVector<unsigned, 4> DefsB;
+ SmallVector<unsigned, 8> UsesA;
+ SmallVector<unsigned, 8> UsesB;
+
+ parseOperands(ProdMI, DefsA, UsesA);
+ parseOperands(ConsMI, DefsB, UsesB);
+
+ for (auto &RegA : DefsA)
+ for (auto &RegB : UsesB) {
+ // True data dependency.
+ if (RegA == RegB)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegA))
+ for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegB == *SubRegs)
+ return true;
+
+ if (Hexagon::DoubleRegsRegClass.contains(RegB))
+ for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
+ if (RegA == *SubRegs)
+ return true;
+ }
+
+ return false;
+}
+
+
+// Returns true if the instruction is alread a .cur.
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::V6_vL32b_cur_pi:
+ case Hexagon::V6_vL32b_cur_ai:
+ case Hexagon::V6_vL32b_cur_pi_128B:
+ case Hexagon::V6_vL32b_cur_ai_128B:
+ return true;
+ }
+ return false;
+}
+
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
+ if (isNewValueInst(MI) ||
+ (isPredicated(MI) && isPredicatedNew(MI)))
+ return true;
+
+ return false;
+}
+
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
+ const MachineInstr *MIb) const {
+ HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
+ HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
+ return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+ return true;
+
+ // Multiply
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
+ return true;
+ return false;
+}
+
+
+bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
+ return (Opcode == Hexagon::ENDLOOP0 ||
+ Opcode == Hexagon::ENDLOOP1);
+}
+
+
+bool HexagonInstrInfo::isExpr(unsigned OpType) const {
+ switch(OpType) {
+ case MachineOperand::MO_MachineBasicBlock:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+ const MCInstrDesc &MID = MI->getDesc();
+ const uint64_t F = MID.TSFlags;
+ if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+ return true;
+
+ // TODO: This is largely obsolete now. Will need to be removed
+ // in consecutive patches.
+ switch(MI->getOpcode()) {
+ // TFR_FI Remains a special case.
+ case Hexagon::TFR_FI:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+ // First check if this is permanently extended op code.
+ const uint64_t F = MI->getDesc().TSFlags;
+ if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+ return true;
+ // Use MO operand flags to determine if one of MI's operands
+ // has HMOTF_ConstExtended flag set.
+ for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+ E = MI->operands_end(); I != E; ++I) {
+ if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::FPPos) & HexagonII::FPMask;
+}
- return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
+
+// No V60 HVX VMEM with A_INDIRECT.
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const {
+ if (!isV60VectorInstruction(I))
+ return false;
+ if (!I->mayLoad() && !I->mayStore())
+ return false;
+ return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
}
-int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
- enum Hexagon::PredSense inPredSense;
- inPredSense = invertPredicate ? Hexagon::PredSense_false :
- Hexagon::PredSense_true;
- int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
- if (CondOpcode >= 0) // Valid Conditional opcode/instruction
- return CondOpcode;
- // This switch case will be removed once all the instructions have been
- // modified to use relation maps.
- switch(Opc) {
- case Hexagon::TFRI_f:
- return !invertPredicate ? Hexagon::TFRI_cPt_f :
- Hexagon::TFRI_cNotPt_f;
- case Hexagon::A2_combinew:
- return !invertPredicate ? Hexagon::C2_ccombinewt :
- Hexagon::C2_ccombinewf;
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_callr :
+ case Hexagon::J2_callrf :
+ case Hexagon::J2_callrt :
+ return true;
+ }
+ return false;
+}
- // DEALLOC_RETURN.
- case Hexagon::L4_return:
- return !invertPredicate ? Hexagon::L4_return_t:
- Hexagon::L4_return_f;
+
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::L4_return :
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_fnew_pt :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ return true;
}
- llvm_unreachable("Unexpected predicable instruction");
+ return false;
}
-bool HexagonInstrInfo::
-PredicateInstruction(MachineInstr *MI,
- ArrayRef<MachineOperand> Cond) const {
- if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
- DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpr :
+ case Hexagon::J2_jumprt :
+ case Hexagon::J2_jumprf :
+ case Hexagon::J2_jumprtnewpt :
+ case Hexagon::J2_jumprfnewpt :
+ case Hexagon::J2_jumprtnew :
+ case Hexagon::J2_jumprfnew :
+ return true;
+ }
+ return false;
+}
+
+
+// Return true if a given MI can accomodate given offset.
+// Use abs estimate as oppose to the exact number.
+// TODO: This will need to be changed to use MC level
+// definition of instruction extendable field size.
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+ unsigned offset) const {
+ // This selection of jump instructions matches to that what
+ // AnalyzeBranch can parse, plus NVJ.
+ if (isNewValueJump(MI)) // r9:2
+ return isInt<11>(offset);
+
+ switch (MI->getOpcode()) {
+ // Still missing Jump to address condition on register value.
+ default:
return false;
+ case Hexagon::J2_jump: // bits<24> dst; // r22:2
+ case Hexagon::J2_call:
+ case Hexagon::CALLv3nr:
+ return isInt<24>(offset);
+ case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
+ case Hexagon::J2_jumpf:
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumpfnewpt:
+ case Hexagon::J2_callt:
+ case Hexagon::J2_callf:
+ return isInt<17>(offset);
+ case Hexagon::J2_loop0i:
+ case Hexagon::J2_loop0iext:
+ case Hexagon::J2_loop0r:
+ case Hexagon::J2_loop0rext:
+ case Hexagon::J2_loop1i:
+ case Hexagon::J2_loop1iext:
+ case Hexagon::J2_loop1r:
+ case Hexagon::J2_loop1rext:
+ return isInt<9>(offset);
+ // TODO: Add all the compound branches here. Can we do this in Relation model?
+ case Hexagon::J4_cmpeqi_tp0_jump_nt:
+ case Hexagon::J4_cmpeqi_tp1_jump_nt:
+ return isInt<11>(offset);
}
- int Opc = MI->getOpcode();
- assert (isPredicable(MI) && "Expected predicable instruction");
- bool invertJump = predOpcodeHasNot(Cond);
+}
- // We have to predicate MI "in place", i.e. after this function returns,
- // MI will need to be transformed into a predicated form. To avoid com-
- // plicated manipulations with the operands (handling tied operands,
- // etc.), build a new temporary instruction, then overwrite MI with it.
- MachineBasicBlock &B = *MI->getParent();
- DebugLoc DL = MI->getDebugLoc();
- unsigned PredOpc = getCondOpcode(Opc, invertJump);
- MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
- unsigned NOp = 0, NumOps = MI->getNumOperands();
- while (NOp < NumOps) {
- MachineOperand &Op = MI->getOperand(NOp);
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
- break;
- T.addOperand(Op);
- NOp++;
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const {
+ if (!LRMI || !ESMI)
+ return false;
+
+ bool isLate = isLateResultInstr(LRMI);
+ bool isEarly = isEarlySourceInstr(ESMI);
+
+ DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
+ DEBUG(LRMI->dump());
+ DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
+ DEBUG(ESMI->dump());
+
+ if (isLate && isEarly) {
+ DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+ return true;
}
- unsigned PredReg, PredRegPos, PredRegFlags;
- bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
- (void)GotPredReg;
- assert(GotPredReg);
- T.addReg(PredReg, PredRegFlags);
- while (NOp < NumOps)
- T.addOperand(MI->getOperand(NOp++));
+ return false;
+}
- MI->setDesc(get(PredOpc));
- while (unsigned n = MI->getNumOperands())
- MI->RemoveOperand(n-1);
- for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
- MI->addOperand(T->getOperand(i));
- MachineBasicBlock::instr_iterator TI = &*T;
- B.erase(TI);
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
- MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
- MRI.clearKillFlags(PredReg);
+ switch (MI->getOpcode()) {
+ case TargetOpcode::EXTRACT_SUBREG:
+ case TargetOpcode::INSERT_SUBREG:
+ case TargetOpcode::SUBREG_TO_REG:
+ case TargetOpcode::REG_SEQUENCE:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::COPY:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::PHI:
+ return false;
+ default:
+ break;
+ }
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ case Hexagon::Sched::V2LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V2LDST_tc_st_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_ld_SLOT01:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT0:
+ case Hexagon::Sched::V4LDST_tc_st_SLOT01:
+ return false;
+ }
return true;
}
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
- return true;
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
+ // resource, but all operands can be received late like an ALU instruction.
+ return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
}
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles,
- unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles,
- unsigned ExtraFCycles,
- const BranchProbability &Probability) const {
- return true;
+bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+ return Opcode == Hexagon::J2_loop0i ||
+ Opcode == Hexagon::J2_loop0r ||
+ Opcode == Hexagon::J2_loop0iext ||
+ Opcode == Hexagon::J2_loop0rext ||
+ Opcode == Hexagon::J2_loop1i ||
+ Opcode == Hexagon::J2_loop1r ||
+ Opcode == Hexagon::J2_loop1iext ||
+ Opcode == Hexagon::J2_loop1rext;
+}
+
+
+bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
+ case Hexagon::L4_ior_memopb_io:
+ case Hexagon::L4_ior_memoph_io:
+ case Hexagon::L4_ior_memopw_io:
+ case Hexagon::L4_iand_memopb_io:
+ case Hexagon::L4_iand_memoph_io:
+ case Hexagon::L4_iand_memopw_io:
+ return true;
+ }
+ return false;
}
-// Returns true if an instruction is predicated irrespective of the predicate
-// sense. For example, all of the following will return true.
-// if (p0) R1 = add(R2, R3)
-// if (!p0) R1 = add(R2, R3)
-// if (p0.new) R1 = add(R2, R3)
-// if (!p0.new) R1 = add(R2, R3)
-bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
}
-bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
- return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+ return isNewValueJump(MI) || isNewValueStore(MI);
}
-bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- assert(isPredicated(MI));
- return (!((F >> HexagonII::PredicatedFalsePos) &
- HexagonII::PredicatedFalseMask));
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+ return isNewValue(MI) && MI->isBranch();
}
-bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+
+bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
+ return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
- // Make sure that the instruction is predicated.
- assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
- return (!((F >> HexagonII::PredicatedFalsePos) &
- HexagonII::PredicatedFalseMask));
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+ unsigned OperandNum) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+ == OperandNum;
}
+
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
+ return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
-
assert(isPredicated(MI));
- return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
+
bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
-
assert(isPredicated(Opcode));
- return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+ return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
-// Returns true, if a ST insn can be promoted to a new-value store.
-bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
+}
+
- return ((F >> HexagonII::mayNVStorePos) &
- HexagonII::mayNVStoreMask);
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ // Make sure that the instruction is predicated.
+ assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+ return !((F >> HexagonII::PredicatedFalsePos) &
+ HexagonII::PredicatedFalseMask);
}
-bool
-HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const {
- for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
- MachineOperand MO = MI->getOperand(oper);
- if (MO.isReg() && MO.isDef()) {
- const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
- if (RC == &Hexagon::PredRegsRegClass) {
- Pred.push_back(MO);
- return true;
- }
- }
+
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+}
+
+
+bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ assert(get(Opcode).isBranch() &&
+ (isPredicatedNew(Opcode) || isNewValue(Opcode)));
+ return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
+}
+
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+ return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+ MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+}
+
+
+bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
+}
+
+
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ case Hexagon::STriw_pred :
+ case Hexagon::LDriw_pred :
+ return true;
+ default:
+ return false;
}
- return false;
}
-bool
-HexagonInstrInfo::
-SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
- ArrayRef<MachineOperand> Pred2) const {
- // TODO: Fix this
- return false;
+// Returns true when SU has a timing class TC1.
+bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_1_SLOT23:
+ case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+ //case Hexagon::Sched::M_tc_1_SLOT23:
+ case Hexagon::Sched::S_2op_tc_1_SLOT23:
+ case Hexagon::Sched::S_3op_tc_1_SLOT23:
+ return true;
+
+ default:
+ return false;
+ }
}
-//
-// We indicate that we want to reverse the branch by
-// inserting the reversed branching opcode.
-//
-bool HexagonInstrInfo::ReverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const {
- if (Cond.empty())
+bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2_SLOT23:
+ case Hexagon::Sched::CR_tc_2_SLOT3:
+ case Hexagon::Sched::M_tc_2_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2_SLOT23:
return true;
- assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
- Opcode_t opcode = Cond[0].getImm();
- //unsigned temp;
- assert(get(opcode).isBranch() && "Should be a branching condition.");
- if (isEndLoopN(opcode))
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ switch (SchedClass) {
+ case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
+ case Hexagon::Sched::ALU64_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT23:
+ case Hexagon::Sched::CR_tc_2early_SLOT3:
+ case Hexagon::Sched::J_tc_2early_SLOT0123:
+ case Hexagon::Sched::J_tc_2early_SLOT2:
+ case Hexagon::Sched::J_tc_2early_SLOT23:
+ case Hexagon::Sched::S_2op_tc_2early_SLOT23:
+ case Hexagon::Sched::S_3op_tc_2early_SLOT23:
return true;
- Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
- Cond[0].setImm(NewOpcode);
- return false;
+
+ default:
+ return false;
+ }
+}
+
+
+bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ unsigned SchedClass = MI->getDesc().getSchedClass();
+ return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
}
-bool HexagonInstrInfo::
-isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
- const BranchProbability &Probability) const {
- return (NumInstrs <= 4);
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+
+ const uint64_t V = getType(MI);
+ return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
}
-bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::L4_return:
- case Hexagon::L4_return_t:
- case Hexagon::L4_return_f:
- case Hexagon::L4_return_tnew_pnt:
- case Hexagon::L4_return_fnew_pnt:
- case Hexagon::L4_return_tnew_pt:
- case Hexagon::L4_return_fnew_pt:
- return true;
+
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
+ if (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+ VT == MVT::v32i16 || VT == MVT::v64i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX &&
+ (Offset & 0x3f) == 0);
+ }
+ // 128B
+ if (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+ VT == MVT::v64i16 || VT == MVT::v128i8) {
+ return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B &&
+ Offset <= Hexagon_MEMV_AUTOINC_MAX_128B &&
+ (Offset & 0x7f) == 0);
+ }
+ if (VT == MVT::i64) {
+ return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+ (Offset & 0x7) == 0);
+ }
+ if (VT == MVT::i32) {
+ return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+ (Offset & 0x3) == 0);
+ }
+ if (VT == MVT::i16) {
+ return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+ (Offset & 0x1) == 0);
+ }
+ if (VT == MVT::i8) {
+ return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+ Offset <= Hexagon_MEMB_AUTOINC_MAX);
}
+ llvm_unreachable("Not an auto-inc opc!");
}
@@ -1222,6 +2329,40 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
// misaligns with respect to load size.
switch (Opcode) {
+ case Hexagon::STriq_pred_V6:
+ case Hexagon::STriq_pred_vec_V6:
+ case Hexagon::STriv_pseudo_V6:
+ case Hexagon::STrivv_pseudo_V6:
+ case Hexagon::LDriq_pred_V6:
+ case Hexagon::LDriq_pred_vec_V6:
+ case Hexagon::LDriv_pseudo_V6:
+ case Hexagon::LDrivv_pseudo_V6:
+ case Hexagon::LDrivv_indexed:
+ case Hexagon::STrivv_indexed:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::V6_vS32Ub_ai:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX);
+
+ case Hexagon::STriq_pred_V6_128B:
+ case Hexagon::STriq_pred_vec_V6_128B:
+ case Hexagon::STriv_pseudo_V6_128B:
+ case Hexagon::STrivv_pseudo_V6_128B:
+ case Hexagon::LDriq_pred_V6_128B:
+ case Hexagon::LDriq_pred_vec_V6_128B:
+ case Hexagon::LDriv_pseudo_V6_128B:
+ case Hexagon::LDrivv_pseudo_V6_128B:
+ case Hexagon::LDrivv_indexed_128B:
+ case Hexagon::STrivv_indexed_128B:
+ case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vL32Ub_ai_128B:
+ case Hexagon::V6_vS32Ub_ai_128B:
+ return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
+ (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+
case Hexagon::J2_loop0i:
case Hexagon::J2_loop1i:
return isUInt<10>(Offset);
@@ -1248,8 +2389,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
(Offset <= Hexagon_MEMH_OFFSET_MAX);
case Hexagon::L2_loadrb_io:
- case Hexagon::S2_storerb_io:
case Hexagon::L2_loadrub_io:
+ case Hexagon::S2_storerb_io:
return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
(Offset <= Hexagon_MEMB_OFFSET_MAX);
@@ -1257,28 +2398,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
(Offset <= Hexagon_ADDI_OFFSET_MAX);
- case Hexagon::L4_iadd_memopw_io:
- case Hexagon::L4_isub_memopw_io:
- case Hexagon::L4_add_memopw_io:
- case Hexagon::L4_sub_memopw_io:
- case Hexagon::L4_and_memopw_io:
- case Hexagon::L4_or_memopw_io:
+ case Hexagon::L4_iadd_memopw_io :
+ case Hexagon::L4_isub_memopw_io :
+ case Hexagon::L4_add_memopw_io :
+ case Hexagon::L4_sub_memopw_io :
+ case Hexagon::L4_and_memopw_io :
+ case Hexagon::L4_or_memopw_io :
return (0 <= Offset && Offset <= 255);
- case Hexagon::L4_iadd_memoph_io:
- case Hexagon::L4_isub_memoph_io:
- case Hexagon::L4_add_memoph_io:
- case Hexagon::L4_sub_memoph_io:
- case Hexagon::L4_and_memoph_io:
- case Hexagon::L4_or_memoph_io:
+ case Hexagon::L4_iadd_memoph_io :
+ case Hexagon::L4_isub_memoph_io :
+ case Hexagon::L4_add_memoph_io :
+ case Hexagon::L4_sub_memoph_io :
+ case Hexagon::L4_and_memoph_io :
+ case Hexagon::L4_or_memoph_io :
return (0 <= Offset && Offset <= 127);
- case Hexagon::L4_iadd_memopb_io:
- case Hexagon::L4_isub_memopb_io:
- case Hexagon::L4_add_memopb_io:
- case Hexagon::L4_sub_memopb_io:
- case Hexagon::L4_and_memopb_io:
- case Hexagon::L4_or_memopb_io:
+ case Hexagon::L4_iadd_memopb_io :
+ case Hexagon::L4_isub_memopb_io :
+ case Hexagon::L4_add_memopb_io :
+ case Hexagon::L4_sub_memopb_io :
+ case Hexagon::L4_and_memopb_io :
+ case Hexagon::L4_or_memopb_io :
return (0 <= Offset && Offset <= 63);
// LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
@@ -1291,223 +2432,556 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
case Hexagon::TFR_FIA:
case Hexagon::INLINEASM:
return true;
- }
+
+ case Hexagon::L2_ploadrbt_io:
+ case Hexagon::L2_ploadrbf_io:
+ case Hexagon::L2_ploadrubt_io:
+ case Hexagon::L2_ploadrubf_io:
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirbt_io:
+ case Hexagon::S4_storeirbf_io:
+ return isUInt<6>(Offset);
+
+ case Hexagon::L2_ploadrht_io:
+ case Hexagon::L2_ploadrhf_io:
+ case Hexagon::L2_ploadruht_io:
+ case Hexagon::L2_ploadruhf_io:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeirht_io:
+ case Hexagon::S4_storeirhf_io:
+ return isShiftedUInt<6,1>(Offset);
+
+ case Hexagon::L2_ploadrit_io:
+ case Hexagon::L2_ploadrif_io:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S4_storeiri_io:
+ case Hexagon::S4_storeirit_io:
+ case Hexagon::S4_storeirif_io:
+ return isShiftedUInt<6,2>(Offset);
+
+ case Hexagon::L2_ploadrdt_io:
+ case Hexagon::L2_ploadrdf_io:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io:
+ return isShiftedUInt<6,3>(Offset);
+ } // switch
llvm_unreachable("No offset range is defined for this opcode. "
"Please define it in the above switch statement!");
}
-//
-// Check if the Offset is a valid auto-inc imm by Load/Store Type.
-//
-bool HexagonInstrInfo::
-isValidAutoIncImm(const EVT VT, const int Offset) const {
+bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
+ return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+}
- if (VT == MVT::i64) {
- return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
- Offset <= Hexagon_MEMD_AUTOINC_MAX &&
- (Offset & 0x7) == 0);
- }
- if (VT == MVT::i32) {
- return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
- Offset <= Hexagon_MEMW_AUTOINC_MAX &&
- (Offset & 0x3) == 0);
- }
- if (VT == MVT::i16) {
- return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
- Offset <= Hexagon_MEMH_AUTOINC_MAX &&
- (Offset & 0x1) == 0);
- }
- if (VT == MVT::i8) {
- return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
- Offset <= Hexagon_MEMB_AUTOINC_MAX);
+
+bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ const uint64_t F = get(MI->getOpcode()).TSFlags;
+ const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+ return
+ V == HexagonII::TypeCVI_VA ||
+ V == HexagonII::TypeCVI_VA_DV;
+}
+
+
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
+ return true;
+
+ if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI)))
+ return true;
+
+ if (mayBeNewStore(ConsMI))
+ return true;
+
+ return false;
+}
+
+
+/// \brief Can these instructions execute at the same time in a bundle.
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const {
+ if (DisableNVSchedule)
+ return false;
+ if (mayBeNewStore(Second)) {
+ // Make sure the definition of the first instruction is the value being
+ // stored.
+ const MachineOperand &Stored =
+ Second->getOperand(Second->getNumOperands() - 1);
+ if (!Stored.isReg())
+ return false;
+ for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
+ const MachineOperand &Op = First->getOperand(i);
+ if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
+ return true;
+ }
}
- llvm_unreachable("Not an auto-inc opc!");
+ return false;
+}
+
+
+bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
+ for (auto &I : *B)
+ if (I.isEHLabel())
+ return true;
+ return false;
}
-bool HexagonInstrInfo::
-isMemOp(const MachineInstr *MI) const {
-// return MI->getDesc().mayLoad() && MI->getDesc().mayStore();
-
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::L4_iadd_memopw_io:
- case Hexagon::L4_isub_memopw_io:
- case Hexagon::L4_add_memopw_io:
- case Hexagon::L4_sub_memopw_io:
- case Hexagon::L4_and_memopw_io:
- case Hexagon::L4_or_memopw_io:
- case Hexagon::L4_iadd_memoph_io:
- case Hexagon::L4_isub_memoph_io:
- case Hexagon::L4_add_memoph_io:
- case Hexagon::L4_sub_memoph_io:
- case Hexagon::L4_and_memoph_io:
- case Hexagon::L4_or_memoph_io:
- case Hexagon::L4_iadd_memopb_io:
- case Hexagon::L4_isub_memopb_io:
- case Hexagon::L4_add_memopb_io:
- case Hexagon::L4_sub_memopb_io:
- case Hexagon::L4_and_memopb_io:
- case Hexagon::L4_or_memopb_io:
- case Hexagon::L4_ior_memopb_io:
- case Hexagon::L4_ior_memoph_io:
- case Hexagon::L4_ior_memopw_io:
- case Hexagon::L4_iand_memopb_io:
- case Hexagon::L4_iand_memoph_io:
- case Hexagon::L4_iand_memopw_io:
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+ short NonExtOpcode;
+ // Check if the instruction has a register form that uses register in place
+ // of the extended operand, if so return that as the non-extended form.
+ if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+ return true;
+
+ if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+ // Check addressing mode and retrieve non-ext equivalent instruction.
+
+ switch (getAddrMode(MI)) {
+ case HexagonII::Absolute :
+ // Load/store with absolute addressing mode can be converted into
+ // base+offset mode.
+ NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseImmOffset :
+ // Load/store with base+offset addressing mode can be converted into
+ // base+register offset addressing mode. However left shift operand should
+ // be set to 0.
+ NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ break;
+ case HexagonII::BaseLongOffset:
+ NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+ break;
+ default:
+ return false;
+ }
+ if (NonExtOpcode < 0)
+ return false;
return true;
}
return false;
}
-bool HexagonInstrInfo::
-isSpillPredRegOp(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::STriw_pred :
- case Hexagon::LDriw_pred :
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(),
+ Hexagon::InstrType_Pseudo) >= 0;
+}
+
+
+bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
+ const {
+ MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+ while (I != E) {
+ if (I->isBarrier())
return true;
+ ++I;
}
+ return false;
}
-bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::C2_cmpeq:
- case Hexagon::C2_cmpeqi:
- case Hexagon::C2_cmpgt:
- case Hexagon::C2_cmpgti:
- case Hexagon::C2_cmpgtu:
- case Hexagon::C2_cmpgtui:
+
+// Returns true, if a LD insn can be promoted to a cur load.
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
+ auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+ const uint64_t F = MI->getDesc().TSFlags;
+ return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
+ HST.hasV60TOps();
+}
+
+
+// Returns true, if a ST insn can be promoted to a new-value store.
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const {
+ // There is no stall when ProdMI is not a V60 vector.
+ if (!isV60VectorInstruction(ProdMI))
+ return false;
+
+ // There is no stall when ProdMI and ConsMI are not dependent.
+ if (!isDependent(ProdMI, ConsMI))
+ return false;
+
+ // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI
+ // are scheduled in consecutive packets.
+ if (isVecUsableNextPacket(ProdMI, ConsMI))
+ return false;
+
+ return true;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator BII) const {
+ // There is no stall when I is not a V60 vector.
+ if (!isV60VectorInstruction(MI))
+ return false;
+
+ MachineBasicBlock::const_instr_iterator MII = BII;
+ MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
+
+ if (!(*MII).isBundle()) {
+ const MachineInstr *J = &*MII;
+ if (!isV60VectorInstruction(J))
+ return false;
+ else if (isVecUsableNextPacket(J, MI))
+ return false;
+ return true;
+ }
+
+ for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
+ const MachineInstr *J = &*MII;
+ if (producesStall(J, MI))
return true;
}
+ return false;
+}
+
+
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+ unsigned PredReg) const {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
+ return false; // Predicate register must be explicitly defined.
+ }
+
+ // Hexagon Programmer's Reference says that decbin, memw_locked, and
+ // memd_locked cannot be used as .new as well,
+ // but we don't seem to have these instructions defined.
+ return MI->getOpcode() != Hexagon::A4_tlbmatch;
+}
+
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
+ return (Opcode == Hexagon::J2_jumpt) ||
+ (Opcode == Hexagon::J2_jumpf) ||
+ (Opcode == Hexagon::J2_jumptnew) ||
+ (Opcode == Hexagon::J2_jumpfnew) ||
+ (Opcode == Hexagon::J2_jumptnewpt) ||
+ (Opcode == Hexagon::J2_jumpfnewpt);
+}
+
+
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
+ if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+ return false;
+ return !isPredicatedTrue(Cond[0].getImm());
+}
+
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
+}
+
+
+// Returns the base register in a memory access (load/store). The offset is
+// returned in Offset and the access size is returned in AccessSize.
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+ int &Offset, unsigned &AccessSize) const {
+ // Return if it is not a base+offset type instruction or a MemOp.
+ if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
+ getAddrMode(MI) != HexagonII::BaseLongOffset &&
+ !isMemOp(MI) && !isPostIncrement(MI))
+ return 0;
+
+ // Since it is a memory access instruction, getMemAccessSize() should never
+ // return 0.
+ assert (getMemAccessSize(MI) &&
+ "BaseImmOffset or BaseLongOffset or MemOp without accessSize");
+
+ // Return Values of getMemAccessSize() are
+ // 0 - Checked in the assert above.
+ // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these.
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+ AccessSize = (1U << (getMemAccessSize(MI) - 1));
+
+ unsigned basePos = 0, offsetPos = 0;
+ if (!getBaseAndOffsetPosition(MI, basePos, offsetPos))
+ return 0;
+
+ // Post increment updates its EA after the mem access,
+ // so we need to treat its offset as zero.
+ if (isPostIncrement(MI))
+ Offset = 0;
+ else {
+ Offset = MI->getOperand(offsetPos).getImm();
+ }
+
+ return MI->getOperand(basePos).getReg();
+}
+
+
+/// Return the position of the base and offset operands for this instruction.
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+ unsigned &BasePos, unsigned &OffsetPos) const {
+ // Deal with memops first.
+ if (isMemOp(MI)) {
+ assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+ "Bad Memop.");
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayStore()) {
+ BasePos = 0;
+ OffsetPos = 1;
+ } else if (MI->mayLoad()) {
+ BasePos = 1;
+ OffsetPos = 2;
+ } else
+ return false;
+
+ if (isPredicated(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+ if (isPostIncrement(MI)) {
+ BasePos++;
+ OffsetPos++;
+ }
+
+ if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+ return false;
+
+ return true;
+}
+
+
+// Inserts branching instructions in reverse order of their occurence.
+// e.g. jump_t t1 (i1)
+// jump t2 (i2)
+// Jumpers = {i2, i1}
+SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
+ MachineBasicBlock& MBB) const {
+ SmallVector<MachineInstr*, 2> Jumpers;
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ if (I == MBB.instr_begin())
+ return Jumpers;
+
+ // A basic block may looks like this:
+ //
+ // [ insn
+ // EH_LABEL
+ // insn
+ // insn
+ // insn
+ // EH_LABEL
+ // insn ]
+ //
+ // It has two succs but does not have a terminator
+ // Don't know how to handle it.
+ do {
+ --I;
+ if (I->isEHLabel())
+ return Jumpers;
+ } while (I != MBB.instr_begin());
+
+ I = MBB.instr_end();
+ --I;
+
+ while (I->isDebugValue()) {
+ if (I == MBB.instr_begin())
+ return Jumpers;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(&*I))
+ return Jumpers;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = &*I;
+ Jumpers.push_back(LastInst);
+ MachineInstr *SecondLastInst = nullptr;
+ // Find one more terminator if present.
+ do {
+ if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+ if (!SecondLastInst) {
+ SecondLastInst = &*I;
+ Jumpers.push_back(SecondLastInst);
+ } else // This is a third branch.
+ return Jumpers;
+ }
+ if (I == MBB.instr_begin())
+ break;
+ --I;
+ } while (true);
+ return Jumpers;
}
-bool HexagonInstrInfo::
-isConditionalTransfer (const MachineInstr *MI) const {
+
+// Returns Operand Index for the constant extended instruction.
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
+}
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
switch (MI->getOpcode()) {
- default: return false;
- case Hexagon::A2_tfrt:
- case Hexagon::A2_tfrf:
- case Hexagon::C2_cmoveit:
- case Hexagon::C2_cmoveif:
- case Hexagon::A2_tfrtnew:
- case Hexagon::A2_tfrfnew:
- case Hexagon::C2_cmovenewit:
- case Hexagon::C2_cmovenewif:
- return true;
+ default:
+ return HexagonII::HCG_None;
+ //
+ // Compound pairs.
+ // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+ // "Rd16=#U6 ; jump #r9:2"
+ // "Rd16=Rs16 ; jump #r9:2"
+ //
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgtu:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtui:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((isUInt<5>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HCG_A;
+ break;
+ case Hexagon::S2_tstbit_i:
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+ MI->getOperand(2).isImm() &&
+ isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+ return HexagonII::HCG_A;
+ break;
+ // The fact that .new form is used pretty much guarantees
+ // that predicate register will match. Nevertheless,
+ // there could be some false positives without additional
+ // checking.
+ case Hexagon::J2_jumptnew:
+ case Hexagon::J2_jumpfnew:
+ case Hexagon::J2_jumptnewpt:
+ case Hexagon::J2_jumpfnewpt:
+ Src1Reg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
+ (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
+ return HexagonII::HCG_B;
+ break;
+ // Transfer and jump:
+ // Rd=#U6 ; jump #r9:2
+ // Rd=Rs ; jump #r9:2
+ // Do not test for jump range here.
+ case Hexagon::J2_jump:
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ return HexagonII::HCG_C;
+ break;
}
+
+ return HexagonII::HCG_None;
+}
+
+
+// Returns -1 when there is no opcode found.
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const {
+ assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
+ assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
+ if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
+ (GB->getOpcode() != Hexagon::J2_jumptnew))
+ return -1;
+ unsigned DestReg = GA->getOperand(0).getReg();
+ if (!GB->readsRegister(DestReg))
+ return -1;
+ if (DestReg == Hexagon::P0)
+ return Hexagon::J4_cmpeqi_tp0_jump_nt;
+ if (DestReg == Hexagon::P1)
+ return Hexagon::J4_cmpeqi_tp1_jump_nt;
+ return -1;
}
-bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::A2_paddf:
- case Hexagon::A2_paddfnew:
- case Hexagon::A2_paddt:
- case Hexagon::A2_paddtnew:
- case Hexagon::A2_pandf:
- case Hexagon::A2_pandfnew:
- case Hexagon::A2_pandt:
- case Hexagon::A2_pandtnew:
- case Hexagon::A4_paslhf:
- case Hexagon::A4_paslhfnew:
- case Hexagon::A4_paslht:
- case Hexagon::A4_paslhtnew:
- case Hexagon::A4_pasrhf:
- case Hexagon::A4_pasrhfnew:
- case Hexagon::A4_pasrht:
- case Hexagon::A4_pasrhtnew:
- case Hexagon::A2_porf:
- case Hexagon::A2_porfnew:
- case Hexagon::A2_port:
- case Hexagon::A2_portnew:
- case Hexagon::A2_psubf:
- case Hexagon::A2_psubfnew:
- case Hexagon::A2_psubt:
- case Hexagon::A2_psubtnew:
- case Hexagon::A2_pxorf:
- case Hexagon::A2_pxorfnew:
- case Hexagon::A2_pxort:
- case Hexagon::A2_pxortnew:
- case Hexagon::A4_psxthf:
- case Hexagon::A4_psxthfnew:
- case Hexagon::A4_psxtht:
- case Hexagon::A4_psxthtnew:
- case Hexagon::A4_psxtbf:
- case Hexagon::A4_psxtbfnew:
- case Hexagon::A4_psxtbt:
- case Hexagon::A4_psxtbtnew:
- case Hexagon::A4_pzxtbf:
- case Hexagon::A4_pzxtbfnew:
- case Hexagon::A4_pzxtbt:
- case Hexagon::A4_pzxtbtnew:
- case Hexagon::A4_pzxthf:
- case Hexagon::A4_pzxthfnew:
- case Hexagon::A4_pzxtht:
- case Hexagon::A4_pzxthtnew:
- case Hexagon::A2_paddit:
- case Hexagon::A2_paddif:
- case Hexagon::C2_ccombinewt:
- case Hexagon::C2_ccombinewf:
- return true;
+
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
+ enum Hexagon::PredSense inPredSense;
+ inPredSense = invertPredicate ? Hexagon::PredSense_false :
+ Hexagon::PredSense_true;
+ int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+ if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+ return CondOpcode;
+
+ // This switch case will be removed once all the instructions have been
+ // modified to use relation maps.
+ switch(Opc) {
+ case Hexagon::TFRI_f:
+ return !invertPredicate ? Hexagon::TFRI_cPt_f :
+ Hexagon::TFRI_cNotPt_f;
}
+
+ llvm_unreachable("Unexpected predicable instruction");
}
-bool HexagonInstrInfo::
-isConditionalLoad (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::L2_ploadrdt_io :
- case Hexagon::L2_ploadrdf_io:
- case Hexagon::L2_ploadrit_io:
- case Hexagon::L2_ploadrif_io:
- case Hexagon::L2_ploadrht_io:
- case Hexagon::L2_ploadrhf_io:
- case Hexagon::L2_ploadrbt_io:
- case Hexagon::L2_ploadrbf_io:
- case Hexagon::L2_ploadruht_io:
- case Hexagon::L2_ploadruhf_io:
- case Hexagon::L2_ploadrubt_io:
- case Hexagon::L2_ploadrubf_io:
- case Hexagon::L2_ploadrdt_pi:
- case Hexagon::L2_ploadrdf_pi:
- case Hexagon::L2_ploadrit_pi:
- case Hexagon::L2_ploadrif_pi:
- case Hexagon::L2_ploadrht_pi:
- case Hexagon::L2_ploadrhf_pi:
- case Hexagon::L2_ploadrbt_pi:
- case Hexagon::L2_ploadrbf_pi:
- case Hexagon::L2_ploadruht_pi:
- case Hexagon::L2_ploadruhf_pi:
- case Hexagon::L2_ploadrubt_pi:
- case Hexagon::L2_ploadrubf_pi:
- case Hexagon::L4_ploadrdt_rr:
- case Hexagon::L4_ploadrdf_rr:
- case Hexagon::L4_ploadrbt_rr:
- case Hexagon::L4_ploadrbf_rr:
- case Hexagon::L4_ploadrubt_rr:
- case Hexagon::L4_ploadrubf_rr:
- case Hexagon::L4_ploadrht_rr:
- case Hexagon::L4_ploadrhf_rr:
- case Hexagon::L4_ploadruht_rr:
- case Hexagon::L4_ploadruhf_rr:
- case Hexagon::L4_ploadrit_rr:
- case Hexagon::L4_ploadrif_rr:
- return true;
+
+// Return the cur value instruction for a given store.
+int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown .cur type");
+ case Hexagon::V6_vL32b_pi:
+ return Hexagon::V6_vL32b_cur_pi;
+ case Hexagon::V6_vL32b_ai:
+ return Hexagon::V6_vL32b_cur_ai;
+ //128B
+ case Hexagon::V6_vL32b_pi_128B:
+ return Hexagon::V6_vL32b_cur_pi_128B;
+ case Hexagon::V6_vL32b_ai_128B:
+ return Hexagon::V6_vL32b_cur_ai_128B;
}
+ return 0;
}
-// Returns true if an instruction is a conditional store.
-//
-// Note: It doesn't include conditional new-value stores as they can't be
-// converted to .new predicate.
+
+
+// The diagram below shows the steps involved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
//
// p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
// ^ ^
@@ -1524,8 +2998,6 @@ isConditionalLoad (const MachineInstr* MI) const {
// p.old store
// [if (p0)memw(R0+#0)=R2]
//
-// The above diagram shows the steps involoved in the conversion of a predicated
-// store instruction to its .new predicated new-value form.
//
// The following set of instructions further explains the scenario where
// conditional new-value store becomes invalid when promoted to .new predicate
@@ -1538,105 +3010,33 @@ isConditionalLoad (const MachineInstr* MI) const {
// the first two instructions because in instr 1, r0 is conditional on old value
// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
// is not valid for new-value stores.
-bool HexagonInstrInfo::
-isConditionalStore (const MachineInstr* MI) const {
- switch (MI->getOpcode())
- {
- default: return false;
- case Hexagon::S4_storeirbt_io:
- case Hexagon::S4_storeirbf_io:
- case Hexagon::S4_pstorerbt_rr:
- case Hexagon::S4_pstorerbf_rr:
- case Hexagon::S2_pstorerbt_io:
- case Hexagon::S2_pstorerbf_io:
- case Hexagon::S2_pstorerbt_pi:
- case Hexagon::S2_pstorerbf_pi:
- case Hexagon::S2_pstorerdt_io:
- case Hexagon::S2_pstorerdf_io:
- case Hexagon::S4_pstorerdt_rr:
- case Hexagon::S4_pstorerdf_rr:
- case Hexagon::S2_pstorerdt_pi:
- case Hexagon::S2_pstorerdf_pi:
- case Hexagon::S2_pstorerht_io:
- case Hexagon::S2_pstorerhf_io:
- case Hexagon::S4_storeirht_io:
- case Hexagon::S4_storeirhf_io:
- case Hexagon::S4_pstorerht_rr:
- case Hexagon::S4_pstorerhf_rr:
- case Hexagon::S2_pstorerht_pi:
- case Hexagon::S2_pstorerhf_pi:
- case Hexagon::S2_pstorerit_io:
- case Hexagon::S2_pstorerif_io:
- case Hexagon::S4_storeirit_io:
- case Hexagon::S4_storeirif_io:
- case Hexagon::S4_pstorerit_rr:
- case Hexagon::S4_pstorerif_rr:
- case Hexagon::S2_pstorerit_pi:
- case Hexagon::S2_pstorerif_pi:
-
- // V4 global address store before promoting to dot new.
- case Hexagon::S4_pstorerdt_abs:
- case Hexagon::S4_pstorerdf_abs:
- case Hexagon::S4_pstorerbt_abs:
- case Hexagon::S4_pstorerbf_abs:
- case Hexagon::S4_pstorerht_abs:
- case Hexagon::S4_pstorerhf_abs:
- case Hexagon::S4_pstorerit_abs:
- case Hexagon::S4_pstorerif_abs:
- return true;
-
- // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
- // from the "Conditional Store" list. Because a predicated new value store
- // would NOT be promoted to a double dot new store. See diagram below:
- // This function returns yes for those stores that are predicated but not
- // yet promoted to predicate dot new instructions.
- //
- // +---------------------+
- // /-----| if (p0) memw(..)=r0 |---------\~
- // || +---------------------+ ||
- // promote || /\ /\ || promote
- // || /||\ /||\ ||
- // \||/ demote || \||/
- // \/ || || \/
- // +-------------------------+ || +-------------------------+
- // | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
- // +-------------------------+ || +-------------------------+
- // || || ||
- // || demote \||/
- // promote || \/ NOT possible
- // || || /\~
- // \||/ || /||\~
- // \/ || ||
- // +-----------------------------+
- // | if (p0.new) memw(..)=r0.new |
- // +-----------------------------+
- // Double Dot New Store
- //
- }
-}
-
-
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
- if (isNewValue(MI) && isBranch(MI))
- return true;
- return false;
-}
-
-bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
- return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
-}
-
-bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
- return (getAddrMode(MI) == HexagonII::PostInc);
-}
-
-// Returns true, if any one of the operands is a dot new
-// insn, whether it is predicated dot new or register dot new.
-bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
- return (isNewValueInst(MI) ||
- (isPredicated(MI) && isPredicatedNew(MI)));
-}
-
+// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+// from the "Conditional Store" list. Because a predicated new value store
+// would NOT be promoted to a double dot new store. See diagram below:
+// This function returns yes for those stores that are predicated but not
+// yet promoted to predicate dot new instructions.
+//
+// +---------------------+
+// /-----| if (p0) memw(..)=r0 |---------\~
+// || +---------------------+ ||
+// promote || /\ /\ || promote
+// || /||\ /||\ ||
+// \||/ demote || \||/
+// \/ || || \/
+// +-------------------------+ || +-------------------------+
+// | if (p0.new) memw(..)=r0 | || | if (p0) memw(..)=r0.new |
+// +-------------------------+ || +-------------------------+
+// || || ||
+// || demote \||/
+// promote || \/ NOT possible
+// || || /\~
+// \||/ || /||\~
+// \/ || ||
+// +-----------------------------+
+// | if (p0.new) memw(..)=r0.new |
+// +-----------------------------+
+// Double Dot New Store
+//
// Returns the most basic instruction for the .new predicated instructions and
// new-value stores.
// For example, all of the following instructions will be converted back to the
@@ -1645,24 +3045,23 @@ bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
// 2) if (p0) memw(R0+#0)= R1.new -------> if (p0) memw(R0+#0) = R1
// 3) if (p0.new) memw(R0+#0) = R1 --->
//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+// if (p0.new) R2 = add(R3, R4)
+// R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
-int HexagonInstrInfo::GetDotOldOp(const int opc) const {
- int NewOp = opc;
- if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
- NewOp = Hexagon::getPredOldOpcode(NewOp);
- assert(NewOp >= 0 &&
- "Couldn't change predicate new instruction to its old form.");
- }
-
- if (isNewValueStore(NewOp)) { // Convert into non-new-value format
- NewOp = Hexagon::getNonNVStore(NewOp);
- assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
- }
- return NewOp;
-}
// Return the new value instruction for a given store.
-int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
+int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
if (NVOpcode >= 0) // Valid new-value store instruction.
return NVOpcode;
@@ -1672,12 +3071,6 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
- case Hexagon::S4_storerh_ur:
- return Hexagon::S4_storerhnew_ur;
-
- case Hexagon::S4_storeri_ur:
- return Hexagon::S4_storerinew_ur;
-
case Hexagon::S2_storerb_pci:
return Hexagon::S2_storerb_pci;
@@ -1692,203 +3085,496 @@ int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
case Hexagon::S2_storerf_pci:
return Hexagon::S2_storerf_pci;
+
+ case Hexagon::V6_vS32b_ai:
+ return Hexagon::V6_vS32b_new_ai;
+
+ case Hexagon::V6_vS32b_pi:
+ return Hexagon::V6_vS32b_new_pi;
+
+ // 128B
+ case Hexagon::V6_vS32b_ai_128B:
+ return Hexagon::V6_vS32b_new_ai_128B;
+
+ case Hexagon::V6_vS32b_pi_128B:
+ return Hexagon::V6_vS32b_new_pi_128B;
}
return 0;
}
-// Return .new predicate version for an instruction.
-int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo
- *MBPI) const {
+// Returns the opcode to use when converting MI, which is a conditional jump,
+// into a conditional instruction which uses the .new value of the predicate.
+// We also use branch probabilities to add a hint to the jump.
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
+ // We assume that block can have at most two successors.
+ bool taken = false;
+ const MachineBasicBlock *Src = MI->getParent();
+ const MachineOperand *BrTarget = &MI->getOperand(1);
+ const MachineBasicBlock *Dst = BrTarget->getMBB();
+ const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
+ if (Prediction >= BranchProbability(1,2))
+ taken = true;
+
+ switch (MI->getOpcode()) {
+ case Hexagon::J2_jumpt:
+ return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ case Hexagon::J2_jumpf:
+ return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+
+ default:
+ llvm_unreachable("Unexpected jump instruction.");
+ }
+}
+
+
+// Return .new predicate version for an instruction.
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const {
int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
if (NewOpcode >= 0) // Valid predicate new instruction
return NewOpcode;
switch (MI->getOpcode()) {
- default: llvm_unreachable("Unknown .new type");
// Condtional Jumps
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
return getDotNewPredJumpOp(MI, MBPI);
- case Hexagon::J2_jumprt:
- return Hexagon::J2_jumptnewpt;
-
- case Hexagon::J2_jumprf:
- return Hexagon::J2_jumprfnewpt;
-
- case Hexagon::JMPrett:
- return Hexagon::J2_jumprtnewpt;
-
- case Hexagon::JMPretf:
- return Hexagon::J2_jumprfnewpt;
-
-
- // Conditional combine
- case Hexagon::C2_ccombinewt:
- return Hexagon::C2_ccombinewnewt;
- case Hexagon::C2_ccombinewf:
- return Hexagon::C2_ccombinewnewf;
+ default:
+ assert(0 && "Unknown .new type");
}
+ return 0;
}
-unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
+int HexagonInstrInfo::getDotOldOp(const int opc) const {
+ int NewOp = opc;
+ if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
+ NewOp = Hexagon::getPredOldOpcode(NewOp);
+ assert(NewOp >= 0 &&
+ "Couldn't change predicate new instruction to its old form.");
+ }
- return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
+ if (isNewValueStore(NewOp)) { // Convert into non-new-value format
+ NewOp = Hexagon::getNonNVStore(NewOp);
+ assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
+ }
+ return NewOp;
}
-/// immediateExtend - Changes the instruction in place to one using an immediate
-/// extender.
-void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
- assert((isExtendable(MI)||isConstExtended(MI)) &&
- "Instruction must be extendable");
- // Find which operand is extendable.
- short ExtOpNum = getCExtOpNum(MI);
- MachineOperand &MO = MI->getOperand(ExtOpNum);
- // This needs to be something we understand.
- assert((MO.isMBB() || MO.isImm()) &&
- "Branch with unknown extendable field type");
- // Mark given operand as extended.
- MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
-}
-DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
- const TargetSubtargetInfo &STI) const {
- const InstrItineraryData *II = STI.getInstrItineraryData();
- return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II);
-}
-
-bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const {
- // Debug info is never a scheduling boundary. It's necessary to be explicit
- // due to the special treatment of IT instructions below, otherwise a
- // dbg_value followed by an IT will result in the IT instruction being
- // considered a scheduling hazard, which is wrong. It should be the actual
- // instruction preceding the dbg_value instruction(s), just like it is
- // when debug info is not present.
- if (MI->isDebugValue())
- return false;
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
+ const MachineInstr *MI) const {
+ unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+ auto &HRI = getRegisterInfo();
- // Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isPosition() || MI->isInlineAsm())
- return true;
+ switch (MI->getOpcode()) {
+ default:
+ return HexagonII::HSIG_None;
+ //
+ // Group L1:
+ //
+ // Rd = memw(Rs+#u4:2)
+ // Rd = memub(Rs+#u4:0)
+ case Hexagon::L2_loadri_io:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ // Special case this one from Group L2.
+ // Rd = memw(r29+#u5:2)
+ if (isIntRegForSubInst(DstReg)) {
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ // Rd = memw(Rs+#u4:2)
+ if (isIntRegForSubInst(SrcReg) &&
+ (MI->getOperand(2).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+ return HexagonII::HSIG_L1;
+ }
+ break;
+ case Hexagon::L2_loadrub_io:
+ // Rd = memub(Rs+#u4:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L1;
+ break;
+ //
+ // Group L2:
+ //
+ // Rd = memh/memuh(Rs+#u3:1)
+ // Rd = memb(Rs+#u3:0)
+ // Rd = memw(r29+#u5:2) - Handled above.
+ // Rdd = memd(r29+#u5:3)
+ // deallocframe
+ // [if ([!]p0[.new])] dealloc_return
+ // [if ([!]p0[.new])] jumpr r31
+ case Hexagon::L2_loadrh_io:
+ case Hexagon::L2_loadruh_io:
+ // Rd = memh/memuh(Rs+#u3:1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrb_io:
+ // Rd = memb(Rs+#u3:0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ isUInt<3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L2_loadrd_io:
+ // Rdd = memd(r29+#u5:3)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg &&
+ MI->getOperand(2).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_L2;
+ break;
+ // dealloc_return is not documented in Hexagon Manual, but marked
+ // with A_SUBINSN attribute in iset_v4classic.py.
+ case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+ case Hexagon::L4_return:
+ case Hexagon::L2_deallocframe:
+ return HexagonII::HSIG_L2;
+ case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::JMPret :
+ // jumpr r31
+ // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+ DstReg = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::JMPrett:
+ case Hexagon::JMPretf:
+ case Hexagon::JMPrettnewpt:
+ case Hexagon::JMPretfnewpt :
+ case Hexagon::JMPrettnew :
+ case Hexagon::JMPretfnew :
+ DstReg = MI->getOperand(1).getReg();
+ SrcReg = MI->getOperand(0).getReg();
+ // [if ([!]p0[.new])] jumpr r31
+ if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
+ (Hexagon::P0 == SrcReg)) &&
+ (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
+ return HexagonII::HSIG_L2;
+ break;
+ case Hexagon::L4_return_t :
+ case Hexagon::L4_return_f :
+ case Hexagon::L4_return_tnew_pnt :
+ case Hexagon::L4_return_fnew_pnt :
+ case Hexagon::L4_return_tnew_pt :
+ case Hexagon::L4_return_fnew_pt :
+ // [if ([!]p0[.new])] dealloc_return
+ SrcReg = MI->getOperand(0).getReg();
+ if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
+ return HexagonII::HSIG_L2;
+ break;
+ //
+ // Group S1:
+ //
+ // memw(Rs+#u4:2) = Rt
+ // memb(Rs+#u4:0) = Rt
+ case Hexagon::S2_storeri_io:
+ // Special case this one from Group S2.
+ // memw(r29+#u5:2) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ isIntRegForSubInst(Src2Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ // memw(Rs+#u4:2) = Rt
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerb_io:
+ // memb(Rs+#u4:0) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group S2:
+ //
+ // memh(Rs+#u3:1) = Rt
+ // memw(r29+#u5:2) = Rt
+ // memd(r29+#s6:3) = Rtt
+ // memw(Rs+#u4:2) = #U1
+ // memb(Rs+#u4) = #U1
+ // allocframe(#u5:3)
+ case Hexagon::S2_storerh_io:
+ // memh(Rs+#u3:1) = Rt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+ MI->getOperand(1).isImm() &&
+ isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ case Hexagon::S2_storerd_io:
+ // memd(r29+#s6:3) = Rtt
+ Src1Reg = MI->getOperand(0).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(Src2Reg, HRI) &&
+ Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+ HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+ isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeiri_io:
+ // memw(Rs+#u4:2) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S4_storeirb_io:
+ // memb(Rs+#u4) = #U1
+ Src1Reg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+ isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() &&
+ MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_S2;
+ break;
+ case Hexagon::S2_allocframe:
+ if (MI->getOperand(0).isImm() &&
+ isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+ return HexagonII::HSIG_S1;
+ break;
+ //
+ // Group A:
+ //
+ // Rx = add(Rx,#s7)
+ // Rd = Rs
+ // Rd = #u6
+ // Rd = #-1
+ // if ([!]P0[.new]) Rd = #0
+ // Rd = add(r29,#u6:2)
+ // Rx = add(Rx,Rs)
+ // P0 = cmp.eq(Rs,#u2)
+ // Rdd = combine(#0,Rs)
+ // Rdd = combine(Rs,#0)
+ // Rdd = combine(#u2,#U2)
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ // Rd = and(Rs,#1)
+ case Hexagon::A2_addi:
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg)) {
+ // Rd = add(r29,#u6:2)
+ if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+ HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
+ isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rx = add(Rx,#s7)
+ if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
+ isInt<7>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ // Rd = add(Rs,#1)
+ // Rd = add(Rs,#-1)
+ if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == -1)))
+ return HexagonII::HSIG_A;
+ }
+ break;
+ case Hexagon::A2_add:
+ // Rx = add(Rx,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ Src1Reg = MI->getOperand(1).getReg();
+ Src2Reg = MI->getOperand(2).getReg();
+ if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+ isIntRegForSubInst(Src2Reg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_andir:
+ // Same as zxtb.
+ // Rd16=and(Rs16,#255)
+ // Rd16=and(Rs16,#1)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() &&
+ ((MI->getOperand(2).getImm() == 1) ||
+ (MI->getOperand(2).getImm() == 255)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfr:
+ // Rd = Rs
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_tfrsi:
+ // Rd = #u6
+ // Do not test for #u6 size since the const is getting extended
+ // regardless and compound could be formed.
+ // Rd = #-1
+ DstReg = MI->getOperand(0).getReg();
+ if (isIntRegForSubInst(DstReg))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmoveit:
+ case Hexagon::C2_cmovenewit:
+ case Hexagon::C2_cmoveif:
+ case Hexagon::C2_cmovenewif:
+ // if ([!]P0[.new]) Rd = #0
+ // Actual form:
+ // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) &&
+ Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
+ MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::C2_cmpeqi:
+ // P0 = cmp.eq(Rs,#u2)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+ Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
+ MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ // Rdd = combine(#u2,#U2)
+ DstReg = MI->getOperand(0).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) &&
+ ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
+ (MI->getOperand(1).isGlobal() &&
+ isUInt<2>(MI->getOperand(1).getOffset()))) &&
+ ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
+ (MI->getOperand(2).isGlobal() &&
+ isUInt<2>(MI->getOperand(2).getOffset()))))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineri:
+ // Rdd = combine(Rs,#0)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
+ (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A4_combineir:
+ // Rdd = combine(#0,Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(2).getReg();
+ if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+ ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
+ (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+ return HexagonII::HSIG_A;
+ break;
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxth:
+ // Rd = sxth/sxtb/zxtb/zxth(Rs)
+ DstReg = MI->getOperand(0).getReg();
+ SrcReg = MI->getOperand(1).getReg();
+ if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+ return HexagonII::HSIG_A;
+ break;
+ }
- return false;
+ return HexagonII::HSIG_None;
}
-bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
- if (isExtended) // Instruction must be extended.
- return true;
- unsigned isExtendable =
- (F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
- if (!isExtendable)
- return false;
-
- short ExtOpNum = getCExtOpNum(MI);
- const MachineOperand &MO = MI->getOperand(ExtOpNum);
- // Use MO operand flags to determine if MO
- // has the HMOTF_ConstExtended flag set.
- if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
- return true;
- // If this is a Machine BB address we are talking about, and it is
- // not marked as extended, say so.
- if (MO.isMBB())
- return false;
-
- // We could be using an instruction with an extendable immediate and shoehorn
- // a global address into it. If it is a global address it will be constant
- // extended. We do this for COMBINE.
- // We currently only handle isGlobal() because it is the only kind of
- // object we are going to end up with here for now.
- // In the future we probably should add isSymbol(), etc.
- if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
- MO.isJTI() || MO.isCPI())
- return true;
-
- // If the extendable operand is not 'Immediate' type, the instruction should
- // have 'isExtended' flag set.
- assert(MO.isImm() && "Extendable operand must be Immediate type");
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+}
- int MinValue = getMinValue(MI);
- int MaxValue = getMaxValue(MI);
- int ImmValue = MO.getImm();
- return (ImmValue < MinValue || ImmValue > MaxValue);
+// Return first non-debug instruction in the basic block.
+MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
+ const {
+ for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
+ MachineInstr *MI = &*MII;
+ if (MI->isDebugValue())
+ continue;
+ return MI;
+ }
+ return nullptr;
}
-// Return the number of bytes required to encode the instruction.
-// Hexagon instructions are fixed length, 4 bytes, unless they
-// use a constant extender, which requires another 4 bytes.
-// For debug instructions and prolog labels, return 0.
-unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
- if (MI->isDebugValue() || MI->isPosition())
- return 0;
+unsigned HexagonInstrInfo::getInstrTimingClassLatency(
+ const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+ // Default to one cycle for no itinerary. However, an "empty" itinerary may
+ // still have a MinLatency property, which getStageLatency checks.
+ if (!ItinData)
+ return getInstrLatency(ItinData, MI);
- unsigned Size = MI->getDesc().getSize();
- if (!Size)
- // Assume the default insn size in case it cannot be determined
- // for whatever reason.
- Size = HEXAGON_INSTR_SIZE;
-
- if (isConstExtended(MI) || isExtended(MI))
- Size += HEXAGON_INSTR_SIZE;
-
- return Size;
+ // Get the latency embedded in the itinerary. If we're not using timing class
+ // latencies or if we using BSB scheduling, then restrict the maximum latency
+ // to 1 (that is, either 0 or 1).
+ if (MI->isTransient())
+ return 0;
+ unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+ if (!EnableTimingClassLatency ||
+ MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+ useBSBScheduling())
+ if (Latency > 1)
+ Latency = 1;
+ return Latency;
}
-// Returns the opcode to use when converting MI, which is a conditional jump,
-// into a conditional instruction which uses the .new value of the predicate.
-// We also use branch probabilities to add a hint to the jump.
-int
-HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
- const
- MachineBranchProbabilityInfo *MBPI) const {
-
- // We assume that block can have at most two successors.
- bool taken = false;
- MachineBasicBlock *Src = MI->getParent();
- MachineOperand *BrTarget = &MI->getOperand(1);
- MachineBasicBlock *Dst = BrTarget->getMBB();
- const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
- if (Prediction >= BranchProbability(1,2))
- taken = true;
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::getInvertedPredSense(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond.empty())
+ return false;
+ unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm());
+ Cond[0].setImm(Opc);
+ return true;
+}
- switch (MI->getOpcode()) {
- case Hexagon::J2_jumpt:
- return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
- case Hexagon::J2_jumpf:
- return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
- default:
- llvm_unreachable("Unexpected jump instruction.");
- }
-}
-// Returns true if a particular operand is extendable for an instruction.
-bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
- unsigned short OperandNum) const {
- const uint64_t F = MI->getDesc().TSFlags;
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+ int InvPredOpcode;
+ InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+ : Hexagon::getTruePredOpcode(Opc);
+ if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+ return InvPredOpcode;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
- == OperandNum;
+ llvm_unreachable("Unexpected predicated instruction");
}
-// Returns Operand Index for the constant extended instruction.
-unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
- const uint64_t F = MI->getDesc().TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-// Returns the min value that doesn't need to be extended.
-int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+// Returns the max value that doesn't need to be extended.
+int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
& HexagonII::ExtentSignedMask;
@@ -1896,13 +3582,20 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return -1U << (bits - 1);
+ return ~(-1U << (bits - 1));
else
- return 0;
+ return ~(-1U << bits);
}
-// Returns the max value that doesn't need to be extended.
-int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
+
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
+}
+
+
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
const uint64_t F = MI->getDesc().TSFlags;
unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
& HexagonII::ExtentSignedMask;
@@ -1910,49 +3603,14 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
& HexagonII::ExtentBitsMask;
if (isSigned) // if value is signed
- return ~(-1U << (bits - 1));
+ return -1U << (bits - 1);
else
- return ~(-1U << bits);
+ return 0;
}
-// Returns true if an instruction can be converted into a non-extended
-// equivalent instruction.
-bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
-
- short NonExtOpcode;
- // Check if the instruction has a register form that uses register in place
- // of the extended operand, if so return that as the non-extended form.
- if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
- return true;
-
- if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
- // Check addressing mode and retrieve non-ext equivalent instruction.
-
- switch (getAddrMode(MI)) {
- case HexagonII::Absolute :
- // Load/store with absolute addressing mode can be converted into
- // base+offset mode.
- NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
- break;
- case HexagonII::BaseImmOffset :
- // Load/store with base+offset addressing mode can be converted into
- // base+register offset addressing mode. However left shift operand should
- // be set to 0.
- NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
- break;
- default:
- return false;
- }
- if (NonExtOpcode < 0)
- return false;
- return true;
- }
- return false;
-}
// Returns opcode of the non-extended equivalent instruction.
-short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
-
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
// Check if the instruction has a register form that uses register in place
// of the extended operand, if so return that as the non-extended form.
short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
@@ -1963,9 +3621,12 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
// Check addressing mode and retrieve non-ext equivalent instruction.
switch (getAddrMode(MI)) {
case HexagonII::Absolute :
- return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+ return Hexagon::getBaseWithImmOffset(MI->getOpcode());
case HexagonII::BaseImmOffset :
return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+ case HexagonII::BaseLongOffset:
+ return Hexagon::getRegShlForm(MI->getOpcode());
+
default:
return -1;
}
@@ -1973,29 +3634,9 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
return -1;
}
-bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
- return (Opcode == Hexagon::J2_jumpt) ||
- (Opcode == Hexagon::J2_jumpf) ||
- (Opcode == Hexagon::J2_jumptnewpt) ||
- (Opcode == Hexagon::J2_jumpfnewpt) ||
- (Opcode == Hexagon::J2_jumpt) ||
- (Opcode == Hexagon::J2_jumpf);
-}
-
-bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
- if (Cond.empty() || !isPredicated(Cond[0].getImm()))
- return false;
- return !isPredicatedTrue(Cond[0].getImm());
-}
-
-bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
- return (Opcode == Hexagon::ENDLOOP0 ||
- Opcode == Hexagon::ENDLOOP1);
-}
bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
- unsigned &PredReg, unsigned &PredRegPos,
- unsigned &PredRegFlags) const {
+ unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
if (Cond.empty())
return false;
assert(Cond.size() == 2);
@@ -2014,3 +3655,174 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
return true;
}
+
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
+ return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+}
+
+
+short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
+ return Hexagon::getRegForm(MI->getOpcode());
+}
+
+
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+ if (MI->isDebugValue() || MI->isPosition())
+ return 0;
+
+ unsigned Size = MI->getDesc().getSize();
+ if (!Size)
+ // Assume the default insn size in case it cannot be determined
+ // for whatever reason.
+ Size = HEXAGON_INSTR_SIZE;
+
+ if (isConstExtended(MI) || isExtended(MI))
+ Size += HEXAGON_INSTR_SIZE;
+
+ // Try and compute number of instructions in asm.
+ if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ // Count the number of register definitions to find the asm string.
+ unsigned NumDefs = 0;
+ for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+ ++NumDefs)
+ assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+ assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+ // Disassemble the AsmStr and approximate number of instructions.
+ const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ Size = getInlineAsmLength(AsmStr, *MAI);
+ }
+
+ return Size;
+}
+
+
+uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
+ const uint64_t F = MI->getDesc().TSFlags;
+ return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
+}
+
+
+unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
+ const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+ const InstrItineraryData &II = *ST.getInstrItineraryData();
+ const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+
+ return IS.getUnits();
+}
+
+
+unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
+ const uint64_t F = get(Opcode).TSFlags;
+ return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
+}
+
+
+// Calculate size of the basic block without debug instructions.
+unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
+ return nonDbgMICount(BB->instr_begin(), BB->instr_end());
+}
+
+
+unsigned HexagonInstrInfo::nonDbgBundleSize(
+ MachineBasicBlock::const_iterator BundleHead) const {
+ assert(BundleHead->isBundle() && "Not a bundle header");
+ auto MII = BundleHead.getInstrIterator();
+ // Skip the bundle header.
+ return nonDbgMICount(++MII, getBundleEnd(BundleHead));
+}
+
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+ assert((isExtendable(MI)||isConstExtended(MI)) &&
+ "Instruction must be extendable");
+ // Find which operand is extendable.
+ short ExtOpNum = getCExtOpNum(MI);
+ MachineOperand &MO = MI->getOperand(ExtOpNum);
+ // This needs to be something we understand.
+ assert((MO.isMBB() || MO.isImm()) &&
+ "Branch with unknown extendable field type");
+ // Mark given operand as extended.
+ MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+
+bool HexagonInstrInfo::invertAndChangeJumpTarget(
+ MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+ DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
+ << NewTarget->getNumber(); MI->dump(););
+ assert(MI->isBranch());
+ unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
+ int TargetPos = MI->getNumOperands() - 1;
+ // In general branch target is the last operand,
+ // but some implicit defs added at the end might change it.
+ while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+ --TargetPos;
+ assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
+ MI->getOperand(TargetPos).setMBB(NewTarget);
+ if (EnableBranchPrediction && isPredicatedNew(MI)) {
+ NewOpcode = reversePrediction(NewOpcode);
+ }
+ MI->setDesc(get(NewOpcode));
+ return true;
+}
+
+
+void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
+ /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
+ MachineFunction::iterator A = MF.begin();
+ MachineBasicBlock &B = *A;
+ MachineBasicBlock::iterator I = B.begin();
+ MachineInstr *MI = &*I;
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewMI;
+
+ for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
+ insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
+ NewMI = BuildMI(B, MI, DL, get(insn));
+ DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
+ " Class: " << NewMI->getDesc().getSchedClass());
+ NewMI->eraseFromParent();
+ }
+ /* --- The code above is used to generate complete set of Hexagon Insn --- */
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
+ DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
+ MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+ return true;
+}
+
+
+// Reverse the branch prediction.
+unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
+ int PredRevOpcode = -1;
+ if (isPredictedTaken(Opcode))
+ PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode);
+ else
+ PredRevOpcode = Hexagon::takenBranchPrediction(Opcode);
+ assert(PredRevOpcode > 0);
+ return PredRevOpcode;
+}
+
+
+// TODO: Add more rigorous validation.
+bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
+ const {
+ return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index d0b8a46..9530d9f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,4 +1,3 @@
-
//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
@@ -28,23 +27,18 @@ namespace llvm {
struct EVT;
class HexagonSubtarget;
+
class HexagonInstrInfo : public HexagonGenInstrInfo {
virtual void anchor();
const HexagonRegisterInfo RI;
- const HexagonSubtarget &Subtarget;
public:
- typedef unsigned Opcode_t;
-
explicit HexagonInstrInfo(HexagonSubtarget &ST);
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
+ /// TargetInstrInfo overrides.
///
- const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
- /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
@@ -52,7 +46,7 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
- /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
@@ -60,50 +54,118 @@ public:
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
-
+ /// Analyze the branching code at the end of MBB, returning
+ /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+ /// implemented for a target). Upon success, this returns false and returns
+ /// with the following information in various cases:
+ ///
+ /// 1. If this block ends with no branches (it just falls through to its succ)
+ /// just return false, leaving TBB/FBB null.
+ /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+ /// the destination block.
+ /// 3. If this block ends with a conditional branch and it falls through to a
+ /// successor block, it sets TBB to be the branch destination block and a
+ /// list of operands that evaluate the condition. These operands can be
+ /// passed to other TargetInstrInfo methods to create new branches.
+ /// 4. If this block ends with a conditional branch followed by an
+ /// unconditional branch, it returns the 'true' destination in TBB, the
+ /// 'false' destination in FBB, and a list of operands that evaluate the
+ /// condition. These operands can be passed to other TargetInstrInfo
+ /// methods to create new branches.
+ ///
+ /// Note that RemoveBranch and InsertBranch must be implemented to support
+ /// cases where this method returns success.
+ ///
+ /// If AllowModify is true, then this routine is allowed to modify the basic
+ /// block (e.g. delete instructions after the unconditional branch).
+ ///
bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
+ /// Remove the branching code at the end of the specific MBB.
+ /// This is only invoked in cases where AnalyzeBranch returns success. It
+ /// returns the number of instructions that were removed.
unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ /// Insert branch code into the end of the specified MachineBasicBlock.
+ /// The operands to this method are the same as those
+ /// returned by AnalyzeBranch. This is only invoked in cases where
+ /// AnalyzeBranch returns success. It returns the number of instructions
+ /// inserted.
+ ///
+ /// It is also invoked by tail merging to add unconditional branches in
+ /// cases where AnalyzeBranch doesn't apply because there was no original
+ /// branch to analyze. At least this much must be implemented, else tail
+ /// merging needs to be disabled.
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
- bool analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const override;
+ /// Return true if it's profitable to predicate
+ /// instructions with accumulated instruction latency of "NumCycles"
+ /// of the specified basic block, where the probability of the instructions
+ /// being executed is given by Probability, and Confidence is a measure
+ /// of our confidence that it will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ BranchProbability Probability) const override;
+
+ /// Second variant of isProfitableToIfCvt. This one
+ /// checks for the case where two basic blocks from true and false path
+ /// of a if-then-else (diamond) are predicated on mutally exclusive
+ /// predicates, where the probability of the true path being taken is given
+ /// by Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ BranchProbability Probability) const override;
+
+ /// Return true if it's profitable for if-converter to duplicate instructions
+ /// of specified accumulated instruction latencies in the specified MBB to
+ /// enable if-conversion.
+ /// The probability of the instructions being executed is given by
+ /// Probability, and Confidence is a measure of our confidence that it
+ /// will be properly predicted.
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override;
+ /// Emit instructions to copy a pair of physical registers.
+ ///
+ /// This function should support copies within any legal register class as
+ /// well as any cross-class copies created during instruction selection.
+ ///
+ /// The source and destination registers may overlap, which may require a
+ /// careful implementation when multiple copy instructions are required for
+ /// large registers. See for example the ARM target.
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
+ /// Store the specified register of the given register class to the specified
+ /// stack frame index. The store instruction is to be added to the given
+ /// machine basic block before the specified machine instruction. If isKill
+ /// is true, the register operand is the last use and must be marked kill.
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
+ /// Load the specified register of the given register class from the specified
+ /// stack frame index. The load instruction is to be added to the given
+ /// machine basic block before the specified machine instruction.
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- /// expandPostRAPseudo - This function is called for all pseudo instructions
+ /// This function is called for all pseudo instructions
/// that remain after register allocation. Many pseudo instructions are
/// created to help register allocation. This is the place to convert them
/// into real instructions. The target can edit MI in place, or it can insert
@@ -111,122 +173,228 @@ public:
/// anything was changed.
bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- int FrameIndex) const override;
+ /// Reverses the branch condition of the specified condition list,
+ /// returning false on success and true if it cannot be reversed.
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const override;
- MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- ArrayRef<unsigned> Ops,
- MachineBasicBlock::iterator InsertPt,
- MachineInstr *LoadMI) const override {
- return nullptr;
- }
+ /// Insert a noop into the instruction stream at the specified point.
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
- unsigned createVR(MachineFunction* MF, MVT VT) const;
+ /// Returns true if the instruction is already predicated.
+ bool isPredicated(const MachineInstr *MI) const override;
- bool isBranch(const MachineInstr *MI) const;
- bool isPredicable(MachineInstr *MI) const override;
+ /// Convert the instruction into a predicated instruction.
+ /// It returns true if the operation was successful.
bool PredicateInstruction(MachineInstr *MI,
ArrayRef<MachineOperand> Cond) const override;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
-
- bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const override;
+ /// Returns true if the first specified predicate
+ /// subsumes the second, e.g. GE subsumes GT.
+ bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+ ArrayRef<MachineOperand> Pred2) const override;
- bool isPredicated(const MachineInstr *MI) const override;
- bool isPredicated(unsigned Opcode) const;
- bool isPredicatedTrue(const MachineInstr *MI) const;
- bool isPredicatedTrue(unsigned Opcode) const;
- bool isPredicatedNew(const MachineInstr *MI) const;
- bool isPredicatedNew(unsigned Opcode) const;
+ /// If the specified instruction defines any predicate
+ /// or condition code register(s) used for predication, returns true as well
+ /// as the definition predicate(s) by reference.
bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const override;
- bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
- ArrayRef<MachineOperand> Pred2) const override;
- bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ /// Return true if the specified instruction can be predicated.
+ /// By default, this returns true for every instruction with a
+ /// PredicateOperand.
+ bool isPredicable(MachineInstr *MI) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- const BranchProbability &Probability) const override;
+ /// Test if the given instruction should be considered a scheduling boundary.
+ /// This primarily includes labels and terminators.
+ bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
+ /// Measure the specified inline asm to determine an approximation of its
+ /// length.
+ unsigned getInlineAsmLength(const char *Str,
+ const MCAsmInfo &MAI) const override;
+
+ /// Allocate and return a hazard recognizer to use for this target when
+ /// scheduling the machine instructions after register allocation.
+ ScheduleHazardRecognizer*
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+ const ScheduleDAG *DAG) const override;
+
+ /// For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2 if having two register operands, and the value it
+ /// compares against in CmpValue. Return true if the comparison instruction
+ /// can be analyzed.
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ /// Compute the instruction latency of a given instruction.
+ /// If the instruction has higher cost when predicated, it's returned via
+ /// PredCost.
+ unsigned getInstrLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI,
+ unsigned *PredCost = 0) const override;
+
+ /// Create machine specific model for scheduling.
DFAPacketizer *
CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
- bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const override;
- bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
- bool isValidAutoIncImm(const EVT VT, const int Offset) const;
- bool isMemOp(const MachineInstr *MI) const;
- bool isSpillPredRegOp(const MachineInstr *MI) const;
- bool isU6_3Immediate(const int value) const;
- bool isU6_2Immediate(const int value) const;
- bool isU6_1Immediate(const int value) const;
- bool isU6_0Immediate(const int value) const;
- bool isS4_3Immediate(const int value) const;
- bool isS4_2Immediate(const int value) const;
- bool isS4_1Immediate(const int value) const;
- bool isS4_0Immediate(const int value) const;
- bool isS12_Immediate(const int value) const;
- bool isU6_Immediate(const int value) const;
- bool isS8_Immediate(const int value) const;
- bool isS6_Immediate(const int value) const;
-
- bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
- bool isConditionalTransfer(const MachineInstr* MI) const;
+ // Sometimes, it is possible for the target
+ // to tell, even without aliasing information, that two MIs access different
+ // memory addresses. This function returns true if two MIs access different
+ // memory addresses and false otherwise.
+ bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+ AliasAnalysis *AA = nullptr)
+ const override;
+
+
+ /// HexagonInstrInfo specifics.
+ ///
+
+ const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+ unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+ bool isAbsoluteSet(const MachineInstr* MI) const;
+ bool isAccumulator(const MachineInstr *MI) const;
+ bool isComplex(const MachineInstr *MI) const;
+ bool isCompoundBranchInstr(const MachineInstr *MI) const;
+ bool isCondInst(const MachineInstr *MI) const;
bool isConditionalALU32 (const MachineInstr* MI) const;
- bool isConditionalLoad (const MachineInstr* MI) const;
+ bool isConditionalLoad(const MachineInstr* MI) const;
bool isConditionalStore(const MachineInstr* MI) const;
- bool isNewValueInst(const MachineInstr* MI) const;
- bool isNewValue(const MachineInstr* MI) const;
- bool isNewValue(Opcode_t Opcode) const;
- bool isDotNewInst(const MachineInstr* MI) const;
- int GetDotOldOp(const int opc) const;
- int GetDotNewOp(const MachineInstr* MI) const;
- int GetDotNewPredOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo
- *MBPI) const;
- bool mayBeNewStore(const MachineInstr* MI) const;
+ bool isConditionalTransfer(const MachineInstr* MI) const;
+ bool isConstExtended(const MachineInstr *MI) const;
bool isDeallocRet(const MachineInstr *MI) const;
- unsigned getInvertedPredicatedOpcode(const int Opc) const;
+ bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool isDotCurInst(const MachineInstr* MI) const;
+ bool isDotNewInst(const MachineInstr* MI) const;
+ bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
+ bool isEarlySourceInstr(const MachineInstr *MI) const;
+ bool isEndLoopN(unsigned Opcode) const;
+ bool isExpr(unsigned OpType) const;
bool isExtendable(const MachineInstr* MI) const;
bool isExtended(const MachineInstr* MI) const;
- bool isPostIncrement(const MachineInstr* MI) const;
+ bool isFloat(const MachineInstr *MI) const;
+ bool isHVXMemWithAIndirect(const MachineInstr *I,
+ const MachineInstr *J) const;
+ bool isIndirectCall(const MachineInstr *MI) const;
+ bool isIndirectL4Return(const MachineInstr *MI) const;
+ bool isJumpR(const MachineInstr *MI) const;
+ bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
+ bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+ const MachineInstr *ESMI) const;
+ bool isLateResultInstr(const MachineInstr *MI) const;
+ bool isLateSourceInstr(const MachineInstr *MI) const;
+ bool isLoopN(const MachineInstr *MI) const;
+ bool isMemOp(const MachineInstr *MI) const;
+ bool isNewValue(const MachineInstr* MI) const;
+ bool isNewValue(unsigned Opcode) const;
+ bool isNewValueInst(const MachineInstr* MI) const;
+ bool isNewValueJump(const MachineInstr* MI) const;
+ bool isNewValueJump(unsigned Opcode) const;
bool isNewValueStore(const MachineInstr* MI) const;
bool isNewValueStore(unsigned Opcode) const;
- bool isNewValueJump(const MachineInstr* MI) const;
- bool isNewValueJump(Opcode_t Opcode) const;
- bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+ bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
+ bool isPostIncrement(const MachineInstr* MI) const;
+ bool isPredicatedNew(const MachineInstr *MI) const;
+ bool isPredicatedNew(unsigned Opcode) const;
+ bool isPredicatedTrue(const MachineInstr *MI) const;
+ bool isPredicatedTrue(unsigned Opcode) const;
+ bool isPredicated(unsigned Opcode) const;
+ bool isPredicateLate(unsigned Opcode) const;
+ bool isPredictedTaken(unsigned Opcode) const;
+ bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+ bool isSolo(const MachineInstr* MI) const;
+ bool isSpillPredRegOp(const MachineInstr *MI) const;
+ bool isTC1(const MachineInstr *MI) const;
+ bool isTC2(const MachineInstr *MI) const;
+ bool isTC2Early(const MachineInstr *MI) const;
+ bool isTC4x(const MachineInstr *MI) const;
+ bool isV60VectorInstruction(const MachineInstr *MI) const;
+ bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+ bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
+ bool isVecAcc(const MachineInstr *MI) const;
+ bool isVecALU(const MachineInstr *MI) const;
+ bool isVecUsableNextPacket(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+
+
+ bool canExecuteInBundle(const MachineInstr *First,
+ const MachineInstr *Second) const;
+ bool hasEHLabel(const MachineBasicBlock *B) const;
+ bool hasNonExtEquivalent(const MachineInstr *MI) const;
+ bool hasPseudoInstrPair(const MachineInstr *MI) const;
+ bool hasUncondBranch(const MachineBasicBlock *B) const;
+ bool mayBeCurLoad(const MachineInstr* MI) const;
+ bool mayBeNewStore(const MachineInstr* MI) const;
+ bool producesStall(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) const;
+ bool producesStall(const MachineInstr *MI,
+ MachineBasicBlock::const_instr_iterator MII) const;
+ bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+ bool PredOpcodeHasJMP_c(unsigned Opcode) const;
+ bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
- void immediateExtend(MachineInstr *MI) const;
- bool isConstExtended(const MachineInstr *MI) const;
- unsigned getSize(const MachineInstr *MI) const;
- int getDotNewPredJumpOp(MachineInstr *MI,
- const MachineBranchProbabilityInfo *MBPI) const;
unsigned getAddrMode(const MachineInstr* MI) const;
- bool isOperandExtended(const MachineInstr *MI,
- unsigned short OperandNum) const;
- unsigned short getCExtOpNum(const MachineInstr *MI) const;
- int getMinValue(const MachineInstr *MI) const;
+ unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+ unsigned &AccessSize) const;
+ bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
+ unsigned &OffsetPos) const;
+ SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
+ unsigned getCExtOpNum(const MachineInstr *MI) const;
+ HexagonII::CompoundGroup
+ getCompoundCandidateGroup(const MachineInstr *MI) const;
+ unsigned getCompoundOpcode(const MachineInstr *GA,
+ const MachineInstr *GB) const;
+ int getCondOpcode(int Opc, bool sense) const;
+ int getDotCurOp(const MachineInstr* MI) const;
+ int getDotNewOp(const MachineInstr* MI) const;
+ int getDotNewPredJumpOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotNewPredOp(const MachineInstr *MI,
+ const MachineBranchProbabilityInfo *MBPI) const;
+ int getDotOldOp(const int opc) const;
+ HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+ const;
+ short getEquivalentHWInstr(const MachineInstr *MI) const;
+ MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
+ unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *MI) const;
+ bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
+ unsigned getInvertedPredicatedOpcode(const int Opc) const;
int getMaxValue(const MachineInstr *MI) const;
- bool NonExtEquivalentExists (const MachineInstr *MI) const;
+ unsigned getMemAccessSize(const MachineInstr* MI) const;
+ int getMinValue(const MachineInstr *MI) const;
short getNonExtOpcode(const MachineInstr *MI) const;
- bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
- bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
- bool isEndLoopN(Opcode_t Opcode) const;
bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
unsigned &PredRegPos, unsigned &PredRegFlags) const;
- int getCondOpcode(int Opc, bool sense) const;
+ short getPseudoInstrPair(const MachineInstr *MI) const;
+ short getRegForm(const MachineInstr *MI) const;
+ unsigned getSize(const MachineInstr *MI) const;
+ uint64_t getType(const MachineInstr* MI) const;
+ unsigned getUnits(const MachineInstr* MI) const;
+ unsigned getValidSubTargets(const unsigned Opcode) const;
+
+ /// getInstrTimingClassLatency - Compute the instruction latency of a given
+ /// instruction using Timing Class information, if available.
+ unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
+ unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
+
+
+ void immediateExtend(MachineInstr *MI) const;
+ bool invertAndChangeJumpTarget(MachineInstr* MI,
+ MachineBasicBlock* NewTarget) const;
+ void genAllInsnTimingClasses(MachineFunction &MF) const;
+ bool reversePredSense(MachineInstr* MI) const;
+ unsigned reversePrediction(unsigned Opcode) const;
+ bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
};
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 3b32c10..5cfeba7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -13,7 +13,7 @@
include "HexagonInstrFormats.td"
include "HexagonOperands.td"
-
+include "HexagonInstrEnc.td"
// Pattern fragment that combines the value type and the register class
// into a single parameter.
// The pat frags in the definitions below need to have a named register,
@@ -1426,9 +1426,6 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
@@ -1606,8 +1603,6 @@ def EH_RETURN_JMPR : T_JMPr;
def: Pat<(eh_return),
(EH_RETURN_JMPR (i32 R31))>;
-def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
- (J2_jumpr IntRegs:$dst)>;
def: Pat<(brind (i32 IntRegs:$dst)),
(J2_jumpr IntRegs:$dst)>;
@@ -2825,7 +2820,7 @@ let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
[(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
+ (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
(i32 IntRegs:$src1)))]>, ImmRegRel;
def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
@@ -2859,7 +2854,7 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
def : T_MType_acc_pat2 <M2_nacci, add, sub>;
//===----------------------------------------------------------------------===//
@@ -3303,7 +3298,8 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
!if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
/* s4_0Imm */ offset{3-0})));
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
let IClass = 0b1010;
@@ -3322,7 +3318,7 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
//===----------------------------------------------------------------------===//
let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew >
+ bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
: STInst <(outs IntRegs:$_dst_),
(ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
!if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3341,7 +3337,8 @@ class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
/* s4_0Imm */ offset{3-0})));
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
let isPredicatedNew = isPredNew;
let isPredicatedFalse = isPredNot;
@@ -3404,7 +3401,6 @@ def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
//===----------------------------------------------------------------------===//
// Template class for post increment stores with register offset.
//===----------------------------------------------------------------------===//
-let isNVStorable = 1 in
class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
MemAccessSize AccessSz, bit isHalf = 0>
: STInst <(outs IntRegs:$_dst_),
@@ -3416,6 +3412,9 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
bits<5> src3;
let accessSize = AccessSz;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} = 0b1101;
@@ -3430,12 +3429,11 @@ def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
-
def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3>MajOp, bit isH = 0>
+ bits<3> MajOp, bit isH = 0>
: STInst <(outs),
(ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
@@ -3455,6 +3453,8 @@ class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
!if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
/* s11_0Ext */ src2{10-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
let IClass = 0b1010;
let Inst{27} = 0b0;
@@ -3494,7 +3494,10 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
!if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
/* u6_0Ext */ src3{5-0})));
- let IClass = 0b0100;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+ let IClass = 0b0100;
let Inst{27} = 0b0;
let Inst{26} = PredNot;
@@ -3508,7 +3511,7 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
let Inst{1-0} = src1;
}
-let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in
+let isExtendable = 1, hasSideEffects = 0 in
multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
@@ -3665,7 +3668,7 @@ def S2_allocframe: ST0Inst <
// S2_storer[bhwdf]_pci: Store byte/half/word/double.
// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
class T_store_pci <string mnemonic, RegisterClass RC,
Operand Imm, bits<4>MajOp,
MemAccessSize AlignSize, string RegSrc = "Rt">
@@ -3679,6 +3682,8 @@ class T_store_pci <string mnemonic, RegisterClass RC,
bits<1> Mu;
bits<5> Rt;
let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
let IClass = 0b1010;
let Inst{27-25} = 0b100;
@@ -3696,15 +3701,15 @@ class T_store_pci <string mnemonic, RegisterClass RC,
}
def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
- ByteAccess>;
+ ByteAccess>;
def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
- HalfWordAccess>;
+ HalfWordAccess>;
def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
- HalfWordAccess, "Rt.h">;
+ HalfWordAccess, "Rt.h">;
def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
- WordAccess>;
+ WordAccess>;
def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
- DoubleWordAccess>;
+ DoubleWordAccess>;
let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
class T_storenew_pci <string mnemonic, Operand Imm,
@@ -3762,7 +3767,7 @@ def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
//===----------------------------------------------------------------------===//
// Circular stores with auto-increment register
//===----------------------------------------------------------------------===//
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
MemAccessSize AlignSize, string RegSrc = "Rt">
: STInst <(outs IntRegs:$_dst_),
@@ -3775,6 +3780,8 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
bits<5> Rt;
let accessSize = AlignSize;
+ let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+ !if(!eq(RegSrc,"Rt.h"), 0, 1));
let IClass = 0b1010;
let Inst{27-25} = 0b100;
@@ -5784,7 +5791,19 @@ include "HexagonInstrInfoV5.td"
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// V60 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
// ALU32/64/Vector +
//===----------------------------------------------------------------------===///
include "HexagonInstrInfoVector.td"
+
+include "HexagonInstrAlias.td"
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 65b0f49..87d6b35 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -684,7 +684,7 @@ def: Pat<(i64 (zext (i32 IntRegs:$src1))),
// Template class for store instructions with Absolute set addressing mode.
//===----------------------------------------------------------------------===//
let isExtended = 1, opExtendable = 1, opExtentBits = 6,
- addrMode = AbsoluteSet, isNVStorable = 1 in
+ addrMode = AbsoluteSet in
class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
: STInst<(outs IntRegs:$dst),
@@ -696,6 +696,9 @@ class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
let accessSize = AccessSz;
let BaseOpcode = BaseOp#"_AbsSet";
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} = 0b1011;
@@ -750,7 +753,7 @@ let mayStore = 1, addrMode = AbsoluteSet in {
}
let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
-addrMode = BaseLongOffset, AddedComplexity = 40 in
+ addrMode = BaseLongOffset, AddedComplexity = 40 in
class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
: STInst<(outs),
@@ -766,6 +769,10 @@ class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
let accessSize = AccessSz;
let CextOpcode = CextOp;
let BaseOpcode = CextOp#"_shl";
+
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b1010;
let Inst{27-24} =0b1101;
@@ -856,6 +863,9 @@ class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
bits<2> u2;
bits<5> Rt;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
let IClass = 0b0011;
let Inst{27-24} = 0b1011;
@@ -888,6 +898,8 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
let isPredicatedFalse = isNot;
let isPredicatedNew = isPredNew;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
let IClass = 0b0011;
@@ -1826,43 +1838,22 @@ def: LogLogNot_pat<or, or, C4_or_orn>;
// below are needed to support code generation for PIC
//===----------------------------------------------------------------------===//
-def SDT_HexagonPICAdd
+def SDT_HexagonAtGot
+ : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
: SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_HexagonGOTAdd
- : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-def SDT_HexagonGOTAddInternal : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-
-def Hexagonpic_add : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
-def Hexagonat_got : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
-def Hexagongat_pcrel : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternal>;
-def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternalJT>;
-def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
- SDT_HexagonGOTAddInternalBA>;
-
-// PIC: Map from a block address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
- (C4_addipc u32ImmPred:$src1)>;
-
-// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
-def: Pat<(Hexagonpic_add texternalsym:$src1),
- (C4_addipc u32ImmPred:$src1)>;
-// PIC: Map from a jump table address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
- (C4_addipc u32ImmPred:$src1)>;
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
-// PIC: Map from a GOT-relative symbol reference to a load
-def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
- (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
-
-// PIC: Map from a static symbol reference to a PC-relative add
-def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
- (C4_addipc u32ImmPred:$src1)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+ (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+ (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+ (C4_addipc imm:$addr)>;
//===----------------------------------------------------------------------===//
// CR -
@@ -1903,7 +1894,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
"$Rd = add($Rs, add($Ru, #$s6))" ,
[(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
+ (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -3290,27 +3281,33 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
}
// Restore registers and dealloc frame before a tail call.
let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
}
// Save registers function call.
let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+ let isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
}
//===----------------------------------------------------------------------===//
// Template class for non predicated store instructions with
// GP-Relative or absolute addressing.
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in
+let hasSideEffects = 0, isPredicable = 1 in
class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf>
- : STInst<(outs), (ins AddrOp:$addr, RC:$src),
- mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""),
+ bits<2>MajOp, bit isAbs, bit isHalf>
+ : STInst<(outs), (ins ImmOp:$addr, RC:$src),
+ mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
[], "", V2LDST_tc_st_SLOT01> {
bits<19> addr;
bits<5> src;
@@ -3321,6 +3318,9 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
!if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
!if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
/* u16_0Imm */ addr{15-0})));
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
let IClass = 0b0100;
let Inst{27} = 1;
let Inst{26-25} = offsetBits{15-14};
@@ -3337,11 +3337,10 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
// Template class for predicated store instructions with
// GP-Relative or absolute addressing.
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6,
- opExtendable = 1 in
+let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
bit isHalf, bit isNot, bit isNew>
- : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2),
+ : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
!if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
[], "", ST_tc_st_SLOT01>, AddrModeRel {
@@ -3351,6 +3350,8 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
let isPredicatedNew = isNew;
let isPredicatedFalse = isNot;
+ // Store upper-half and store doubleword cannot be NV.
+ let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
let IClass = 0b1010;
@@ -3371,7 +3372,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
//===----------------------------------------------------------------------===//
class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
+ : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3538,7 +3539,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
let isAsmParserOnly = 1 in
class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
+ : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
// Set BaseOpcode same as absolute addressing instructions so that
// non-predicated GP-Rel instructions can have relate with predicated
// Absolute instruction.
@@ -3553,7 +3554,7 @@ multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
// Absolute instruction.
let BaseOpcode = BaseOp#_abs in {
def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
- globaladdress, 0, isHalf>;
+ 0, isHalf>;
// New-value store
def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
}
@@ -3615,9 +3616,9 @@ let AddedComplexity = 100 in {
//===----------------------------------------------------------------------===//
let isPredicable = 1, hasSideEffects = 0 in
class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp, Operand AddrOp, bit isAbs>
- : LDInst <(outs RC:$dst), (ins AddrOp:$addr),
- "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)",
+ bits<3> MajOp>
+ : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
+ "$dst = "#mnemonic# "(#$addr)",
[], "", V2LDST_tc_ld_SLOT01> {
bits<5> dst;
bits<19> addr;
@@ -3642,7 +3643,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
+ : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
string ImmOpStr = !cast<string>(ImmOp);
let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3660,10 +3661,11 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
// Template class for predicated load instructions with
// absolute addressing mode.
//===----------------------------------------------------------------------===//
-let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
+ opExtendable = 2 in
class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
bit isPredNot, bit isPredNew>
- : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
+ : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
!if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
bits<5> dst;
@@ -3737,7 +3739,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
let isAsmParserOnly = 1 in
class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
+ : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
let BaseOpcode = BaseOp#_abs;
}
@@ -3841,26 +3843,6 @@ let AddedComplexity = 100 in {
def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
}
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
- (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
-
-// Transfer global address into a register
-let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
-isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
- "$dst = #$src1",
- [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>;
-
-// Transfer a block address into a register
-def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
- (TFRI_V4 tblockaddress:$src1)>;
-
-let AddedComplexity = 50 in
-def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
- (TFRI_V4 tglobaladdr:$src1)>;
-
// i8/i16/i32 -> i64 loads
// We need a complexity of 120 here to override preceding handling of
// zextload.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 337f4ea..823961f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -98,21 +98,21 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
// HexagonInstrInfo.td patterns.
let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, isPseudo = 1 in
def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
"$dst = #$src1",
[(set F32:$dst, fpimm:$src1)]>,
Requires<[HasV5T]>;
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
- hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
+ validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if ($src1) $dst = #$src2", []>,
Requires<[HasV5T]>;
-let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1,
- isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
+ hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2),
"if (!$src1) $dst = #$src2", []>,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
new file mode 100644
index 0000000..897ada0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -0,0 +1,2241 @@
+//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Vector store
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+{
+ class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
+
+}
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+ class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_LD,
+ IType type = TypeCVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+//===----------------------------------------------------------------------===//
+// Vector loads with base + immediate offset
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vload_ai<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
+ asmStr>;
+
+let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
+class T_vload_ai_128B<string asmStr>
+ : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
+ asmStr>;
+
+let isCVLoadable = 1, hasNewValue = 1 in {
+ def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_enc;
+ def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_enc;
+ // 128B
+ def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
+ V6_vL32b_ai_128B_enc;
+ def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
+ def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_enc;
+ def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
+ V6_vL32Ub_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
+ hasNewValue = 1 in {
+ def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_enc;
+ def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_enc;
+ // 128B
+ def V6_vL32b_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2)">,
+ V6_vL32b_cur_ai_128B_enc;
+ def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
+ <"$dst.cur = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_cur_ai_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
+ def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_enc;
+ def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
+ V6_vL32b_nt_tmp_ai_enc;
+ // 128B
+ def V6_vL32b_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_tmp_ai_128B_enc;
+ def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
+ <"$dst.tmp = vmem($src1+#$src2)">,
+ V6_vL32b_nt_tmp_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_enc;
+ def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_ai_128B_enc;
+}
+
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_enc;
+ def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_enc;
+ def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">,
+ V6_vS32Ub_ai_128B_enc;
+}
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
+ Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
+class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
+def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_ai_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_enc;
+ def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
+ V6_vS32b_nt_new_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1 in
+class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "
+ #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_enc;
+ def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_enc;
+ // 128B
+ def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
+ V6_vS32b_pred_ai_128B_enc;
+ def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
+ V6_vS32b_npred_ai_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_enc;
+ def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 0, 1>,
+ V6_vS32b_nt_pred_ai_128B_enc;
+ def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
+ <"vmem", "vS32b_ai", 1, 1>,
+ V6_vS32b_nt_npred_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_enc;
+ def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_enc;
+ // 128B
+ def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
+ V6_vS32Ub_pred_ai_128B_enc;
+ def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
+ V6_vS32Ub_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset in
+class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
+ bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
+def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
+ V6_vS32b_nqpred_ai_enc;
+def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>,
+ V6_vS32b_nt_qpred_ai_enc;
+def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
+ V6_vS32b_nt_nqpred_ai_enc;
+// 128B
+def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
+def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
+ V6_vS32b_nqpred_ai_128B_enc;
+def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>,
+ V6_vS32b_nt_qpred_ai_128B_enc;
+def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
+ V6_vS32b_nt_nqpred_ai_128B_enc;
+
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
+ isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
+class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+
+def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_enc;
+def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_enc;
+// 128B
+def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">,
+ V6_vS32b_new_pred_ai_128B_enc;
+def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
+ V6_vS32b_new_npred_ai_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_enc;
+ def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 0, 1>,
+ V6_vS32b_nt_new_pred_ai_128B_enc;
+ def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
+ <"vS32b_ai", 1, 1>,
+ V6_vS32b_nt_new_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, hasNewValue = 1 in
+class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
+ "$src1 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vload_pi_64B <string asmStr>
+ : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vload_pi_128B <string asmStr>
+ : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_enc;
+ def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_enc;
+ // 128B
+ def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
+ V6_vL32b_pi_128B_enc;
+ def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
+ def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_enc;
+ // 128B
+ def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
+ V6_vL32Ub_pi_128B_enc;
+}
+
+let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
+ def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_enc;
+ def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_enc;
+ // 128B
+ def V6_vL32b_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2)">,
+ V6_vL32b_cur_pi_128B_enc;
+ def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
+ <"$dst.cur = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_cur_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_enc;
+ def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_enc;
+ //128B
+ def V6_vL32b_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2)">,
+ V6_vL32b_tmp_pi_128B_enc;
+ def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
+ <"$dst.tmp = vmem($src1++#$src2):nt">,
+ V6_vL32b_nt_tmp_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let accessSize = Vector64Access in
+class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
+ : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
+ def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pi_128B_enc;
+}
+
+let isNVStorable = 1 , isNonTemporal = 1 in {
+ def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_enc;
+ def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_nt_pi_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_enc;
+ def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment unconditional .new vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, isNVStore = 1 in
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+ "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel {
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
+ : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
+
+
+def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pi_enc;
+def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_enc;
+ def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_nt_new_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, addrMode = PostInc in
+class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
+ RegisterClass RC, bit isPredNot, bit isNT>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
+ bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+ def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_enc;
+ def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_enc;
+ // 128B
+ def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
+ V6_vS32b_pred_pi_128B_enc;
+ def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
+ V6_vS32b_npred_pi_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_enc;
+ def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 0, 1>,
+ V6_vS32b_nt_pred_pi_128B_enc;
+ def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
+ <"vmem", "vS32b_pi", 1, 1>,
+ V6_vS32b_nt_npred_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_enc;
+ def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_enc;
+ // 128B
+ def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
+ V6_vS32Ub_pred_pi_128B_enc;
+ def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
+ V6_vS32Ub_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
+ bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
+def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
+// 128B
+def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B,
+ V6_vS32b_qpred_pi_128B_enc;
+def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
+ V6_vS32b_nqpred_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>,
+ V6_vS32b_nt_qpred_pi_enc;
+ def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
+ V6_vS32b_nt_nqpred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>,
+ V6_vS32b_nt_qpred_pi_128B_enc;
+ def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
+ V6_vS32b_nt_nqpred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
+class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
+ bit isPredNot, bit isNT>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_"> , NewValueRel {
+ let isPredicatedFalse = isPredNot;
+ let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+ : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
+ isPredNot, isNT>;
+
+def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_enc;
+def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_enc;
+// 128B
+def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">,
+ V6_vS32b_new_pred_pi_128B_enc;
+def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
+ V6_vS32b_new_npred_pi_128B_enc;
+let isNonTemporal = 1 in {
+ def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_enc;
+ def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_enc;
+ // 128B
+ def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 0, 1>,
+ V6_vS32b_nt_new_pred_pi_128B_enc;
+ def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
+ <"vS32b_pi", 1, 1>,
+ V6_vS32b_nt_new_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with register offset
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_vload_ppu<string asmStr>
+ : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isCVLoadable = 1 in {
+ def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">,
+ V6_vL32b_ppu_enc;
+ def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_ppu_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
+def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
+ V6_vL32Ub_ppu_enc;
+
+let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
+ def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
+ V6_vL32b_cur_ppu_enc;
+ def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_cur_ppu_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+ def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
+ V6_vL32b_tmp_ppu_enc;
+ def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
+ V6_vL32b_nt_tmp_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset
+//===----------------------------------------------------------------------===//
+class T_vstore_ppu <string mnemonic, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_ppu : T_vstore_ppu <"vmem">,
+ V6_vS32b_ppu_enc;
+ let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
+ def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
+ V6_vS32b_nt_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
+def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+ opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_ppu <bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+ "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+ "$src1 = $_dst_">, NewValueRel;
+
+let BaseOpcode = "vS32b_ppu" in
+def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
+def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst<(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
+ def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+ def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>,
+ V6_vS32b_nt_pred_ppu_enc;
+ def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
+ V6_vS32b_nt_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
+ Type = TypeCVI_VM_STU in {
+ def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">,
+ V6_vS32Ub_pred_ppu_enc;
+ def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
+ V6_vS32Ub_npred_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4", [],
+ "$src2 = $_dst_">, NewValueRel;
+
+def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
+def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
+def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>,
+ V6_vS32b_nt_qpred_ppu_enc;
+def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
+ V6_vS32b_nt_nqpred_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+ isNewValue = 1, opNewValue = 4, isNVStore = 1 in
+class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
+ : V6_STInst <(outs IntRegs:$_dst_),
+ (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+ "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+ #!if(isNT, ":nt", "")#" = $src4.new", [],
+ "$src2 = $_dst_">, NewValueRel {
+ let isPredicatedFalse = isPredNot;
+}
+
+let BaseOpcode = "vS32b_ppu" in {
+ def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu,
+ V6_vS32b_new_pred_ppu_enc;
+ def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
+ V6_vS32b_new_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>,
+ V6_vS32b_nt_new_pred_ppu_enc;
+def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
+ V6_vS32b_nt_new_npred_ppu_enc;
+}
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
+ VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
+ #mnemonic#"($addr+#$off) = $src", []>;
+
+def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
+ Requires<[HasV60T, UseHVXSgl]>;
+def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
+ Requires<[HasV60T, UseHVXDbl]>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (STrivv_indexed_128B IntRegs:$addr, #0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned stores
+ def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Aligned stores
+ def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector store.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, s4_6ImmPred:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // Fold Add R+IFF into vector store 128B.
+ let AddedComplexity = 10 in
+ def : Pat<(store (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, s4_7ImmPred:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : vS32b_ai_pats <v64i8, v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64, v16i64>;
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
+ : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
+ "$dst="#mnemonic#"($addr+#$off)",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
+def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (LDrivv_indexed IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (LDrivv_indexed_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned loads
+ def : Pat < (VTSgl (load IntRegs:$addr)),
+ (V6_vL32b_ai IntRegs:$addr, #0) >,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Load
+ def : Pat < (VTDbl (load IntRegs:$addr)),
+ (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+IFF into vector load.
+ let AddedComplexity = 10 in
+ def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ Requires<[UseHVXDbl]>;
+
+ let AddedComplexity = 10 in
+ def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ Requires<[UseHVXSgl]>;
+}
+
+defm : vL32b_ai_pats <v64i8, v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64, v16i64>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriq_pred_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_vec_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+def STriq_pred_vec_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector predicate pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store vector pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STriv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STrivv_pseudo_V6 : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def STrivv_pseudo_V6_128B : STInst<(outs),
+ (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
+ (ins IntRegs:$base, s32Imm:$offset),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VA_DV,
+ IType type = TypeCVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
+ (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
+ (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
+ ".error \"should not emit\" ",
+ []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+}
+
+def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval), SETEQ)),
+ (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
+ (i32 IntRegs:$rhs))),
+ (v16i32 VectorRegs:$tval),
+ (v16i32 VectorRegs:$fval)))>;
+
+
+let hasNewValue = 1 in
+class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ asmString >;
+
+multiclass T_vmpy <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_vmpy <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_vmpy_VV <string asmString>:
+ T_vmpy <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_vmpy_WW <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VecDblRegs>;
+
+multiclass T_vmpy_VW <string asmString>:
+ T_vmpy <asmString, VectorRegs, VecDblRegs>;
+
+multiclass T_vmpy_WV <string asmString>:
+ T_vmpy <asmString, VecDblRegs, VectorRegs>;
+
+defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
+defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
+defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
+defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
+defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
+defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
+defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
+defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
+defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
+defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
+defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
+
+let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
+defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
+
+defm V6_vdmpybus_dv :
+ T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
+defm V6_vdmpyhsusat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
+defm V6_vdmpyhsuisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
+defm V6_vdmpyhsat :
+ T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
+defm V6_vdmpyhisat :
+ T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
+defm V6_vdmpyhb_dv :
+ T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
+defm V6_vmpyhss :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
+defm V6_vmpyhsrs :
+ T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in
+defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
+defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
+defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
+defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
+defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
+defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
+defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
+defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
+defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
+defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
+}
+
+let hasNewValue = 1 in
+class T_HVX_alu <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_alu_VV <string asmString>:
+ T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_WW <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
+
+multiclass T_HVX_alu_WV <string asmString>:
+ T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
+
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vrmpyubv :
+ T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
+defm V6_vrmpybv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
+defm V6_vrmpybusv :
+ T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
+defm V6_vabsdiffub :
+ T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
+defm V6_vabsdiffh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
+defm V6_vabsdiffuh :
+ T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
+defm V6_vabsdiffw :
+ T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
+}
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhvsat :
+ T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
+defm V6_vmpyhvsrs :
+ T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
+defm V6_vmpyih :
+ T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
+}
+
+defm V6_vand :
+ T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
+defm V6_vor :
+ T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
+defm V6_vxor :
+ T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
+defm V6_vaddw :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
+defm V6_vaddubsat :
+ T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
+defm V6_vadduhsat :
+ T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
+defm V6_vaddhsat :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
+defm V6_vaddwsat :
+ T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
+defm V6_vsubb :
+ T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
+defm V6_vsubh :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
+defm V6_vsubw :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
+defm V6_vsububsat :
+ T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
+defm V6_vsubuhsat :
+ T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
+defm V6_vsubhsat :
+ T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
+defm V6_vsubwsat :
+ T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
+defm V6_vavgub :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
+defm V6_vavguh :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
+defm V6_vavgh :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
+defm V6_vavgw :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
+defm V6_vnavgub :
+ T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
+defm V6_vnavgh :
+ T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
+defm V6_vnavgw :
+ T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
+defm V6_vavgubrnd :
+ T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
+defm V6_vavguhrnd :
+ T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
+defm V6_vavghrnd :
+ T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
+defm V6_vavgwrnd :
+ T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
+
+defm V6_vmpybv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
+defm V6_vmpyubv :
+ T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
+defm V6_vmpybusv :
+ T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
+defm V6_vmpyhv :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
+defm V6_vmpyuhv :
+ T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
+defm V6_vmpyhus :
+ T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
+defm V6_vaddubh :
+ T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
+defm V6_vadduhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
+defm V6_vaddhw :
+ T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
+defm V6_vsububh :
+ T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
+defm V6_vsubuhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
+defm V6_vsubhw :
+ T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
+
+defm V6_vaddb_dv :
+ T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
+defm V6_vaddh_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
+defm V6_vaddw_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
+defm V6_vaddubsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
+defm V6_vadduhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
+defm V6_vaddhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
+defm V6_vaddwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
+defm V6_vsubb_dv :
+ T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
+defm V6_vsubh_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
+defm V6_vsubw_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
+defm V6_vsububsat_dv :
+ T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
+defm V6_vsubuhsat_dv :
+ T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
+defm V6_vsubhsat_dv :
+ T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
+defm V6_vsubwsat_dv :
+ T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
+defm V6_vmpabusv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
+defm V6_vmpabuuv :
+ T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1 in
+class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
+ RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
+ def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin1#"128B"),
+ !cast<RegisterClass>(RCin2#
+ !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
+}
+
+multiclass T_HVX_vmpyacc_VVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
+
+multiclass T_HVX_vmpyacc_VWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WWR <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_VVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVV <string asmString>:
+ T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+
+defm V6_vtmpyb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
+ V6_vtmpyb_acc_enc;
+defm V6_vtmpybus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
+ V6_vtmpybus_acc_enc;
+defm V6_vtmpyhb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
+ V6_vtmpyhb_acc_enc;
+defm V6_vdmpyhb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_acc_enc;
+defm V6_vrmpyub_acc :
+ T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyub_acc_enc;
+defm V6_vrmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybus_acc_enc;
+defm V6_vdmpybus_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_acc_enc;
+defm V6_vdmpybus_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+ V6_vdmpybus_dv_acc_enc;
+defm V6_vdmpyhsuisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
+ V6_vdmpyhsuisat_acc_enc;
+defm V6_vdmpyhisat_acc :
+ T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhisat_acc_enc;
+defm V6_vdmpyhb_dv_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+ V6_vdmpyhb_dv_acc_enc;
+defm V6_vmpybus_acc :
+ T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybus_acc_enc;
+defm V6_vmpabus_acc :
+ T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
+ V6_vmpabus_acc_enc;
+defm V6_vmpahb_acc :
+ T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
+ V6_vmpahb_acc_enc;
+defm V6_vmpyhsat_acc :
+ T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
+ V6_vmpyhsat_acc_enc;
+defm V6_vmpyuh_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuh_acc_enc;
+defm V6_vmpyiwb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
+ V6_vmpyiwb_acc_enc;
+defm V6_vdsaduh_acc :
+ T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
+ V6_vdsaduh_acc_enc;
+defm V6_vmpyihb_acc :
+ T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
+ V6_vmpyihb_acc_enc;
+defm V6_vmpyub_acc :
+ T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyub_acc_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhsusat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
+ V6_vdmpyhsusat_acc_enc;
+defm V6_vdmpyhsat_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhsat_acc_enc;
+defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
+ <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vaslw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
+defm V6_vasrw_acc :
+ T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
+}
+
+defm V6_vdmpyhvsat_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+ V6_vdmpyhvsat_acc_enc;
+defm V6_vmpybusv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
+ V6_vmpybusv_acc_enc;
+defm V6_vmpybv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
+defm V6_vmpyhus_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
+defm V6_vmpyhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
+defm V6_vmpyiewh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
+ V6_vmpyiewh_acc_enc;
+defm V6_vmpyiewuh_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
+ V6_vmpyiewuh_acc_enc;
+defm V6_vmpyih_acc :
+ T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
+defm V6_vmpyowh_rnd_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
+ V6_vmpyowh_rnd_sacc_enc;
+defm V6_vmpyowh_sacc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
+ V6_vmpyowh_sacc_enc;
+defm V6_vmpyubv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+ V6_vmpyubv_acc_enc;
+defm V6_vmpyuhv_acc :
+ T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+ V6_vmpyuhv_acc_enc;
+defm V6_vrmpybusv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+ V6_vrmpybusv_acc_enc;
+defm V6_vrmpybv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
+defm V6_vrmpyubv_acc :
+ T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+ V6_vrmpyubv_acc_enc;
+
+
+class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_vcmp <string asmString> {
+ def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
+defm V6_veqh_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
+defm V6_veqw_and :
+ T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
+defm V6_vgtb_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
+defm V6_vgth_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
+defm V6_vgtw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
+defm V6_vgtub_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
+defm V6_vgtuh_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
+defm V6_vgtuw_and :
+ T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
+defm V6_veqb_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
+defm V6_veqh_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
+defm V6_veqw_or :
+ T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
+defm V6_vgtb_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
+defm V6_vgth_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
+defm V6_vgtw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
+defm V6_vgtub_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
+defm V6_vgtuh_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
+defm V6_vgtuw_or :
+ T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
+defm V6_veqb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
+defm V6_veqh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
+defm V6_veqw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
+defm V6_vgtb_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
+defm V6_vgth_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
+defm V6_vgtw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
+defm V6_vgtub_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
+defm V6_vgtuh_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
+defm V6_vgtuw_xor :
+ T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
+
+defm V6_vminub :
+ T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
+defm V6_vminuh :
+ T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
+defm V6_vminh :
+ T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
+defm V6_vminw :
+ T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
+defm V6_vmaxub :
+ T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
+defm V6_vmaxuh :
+ T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
+defm V6_vmaxh :
+ T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
+defm V6_vmaxw :
+ T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
+defm V6_vshuffeb :
+ T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
+defm V6_vshuffob :
+ T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
+defm V6_vshufeh :
+ T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
+defm V6_vshufoh :
+ T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vmpyowh_rnd :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
+ V6_vmpyowh_rnd_enc;
+defm V6_vmpyiewuh :
+ T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
+defm V6_vmpyewuh :
+ T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
+defm V6_vmpyowh :
+ T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
+defm V6_vmpyiowh :
+ T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
+}
+let Itinerary = CVI_VX, Type = TypeCVI_VX in
+defm V6_vmpyieoh :
+ T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
+defm V6_vshufoeh :
+ T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
+defm V6_vshufoeb :
+ T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
+}
+
+let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+defm V6_vcombine :
+ T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+ SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+ (v16i32 VectorRegs:$Vt))),
+ (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+ (v32i32 VecDblRegs:$Vt))),
+ (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+
+let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
+defm V6_vsathub :
+ T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
+defm V6_vsatwh :
+ T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vroundwh :
+ T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
+defm V6_vroundwuh :
+ T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
+defm V6_vroundhb :
+ T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
+defm V6_vroundhub :
+ T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
+defm V6_vasrwv :
+ T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
+defm V6_vlsrwv :
+ T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
+defm V6_vlsrhv :
+ T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
+defm V6_vasrhv :
+ T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
+defm V6_vaslwv :
+ T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
+defm V6_vaslhv :
+ T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
+}
+
+defm V6_vaddb :
+ T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
+defm V6_vaddh :
+ T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdelta :
+ T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
+defm V6_vrdelta :
+ T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
+defm V6_vdealb4w :
+ T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
+defm V6_vpackeb :
+ T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
+defm V6_vpackeh :
+ T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
+defm V6_vpackhub_sat :
+ T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
+defm V6_vpackhb_sat :
+ T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
+defm V6_vpackwuh_sat :
+ T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
+defm V6_vpackwh_sat :
+ T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
+defm V6_vpackob :
+ T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
+defm V6_vpackoh :
+ T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
+ : CVI_VA_Resource1 <(outs RC2:$dst),
+ (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
+ [], "$dst = $_src_" > {
+ let Itinerary = CVI_VA;
+ let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_condALU <string asmString> {
+ def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
+ V6_vaddbq_enc;
+defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
+ V6_vaddhq_enc;
+defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
+ V6_vaddwq_enc;
+defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
+ V6_vsubbq_enc;
+defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
+ V6_vsubhq_enc;
+defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
+ V6_vsubwq_enc;
+defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
+ V6_vaddbnq_enc;
+defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
+ V6_vaddhnq_enc;
+defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
+ V6_vaddwnq_enc;
+defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
+ V6_vsubbnq_enc;
+defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
+ V6_vsubhnq_enc;
+defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
+ V6_vsubwnq_enc;
+
+let hasNewValue = 1 in
+class T_HVX_alu_2op <string asmString, InstrItinClass itin,
+ RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
+ asmString >{
+ let Itinerary = itin;
+ let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
+ RegisterClass RCin, InstrItinClass itin> {
+ def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu_2op <asmString, itin,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+let hasNewValue = 1 in
+multiclass T_HVX_alu_2op_VV <string asmString>:
+ T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_2op_WV <string asmString>:
+ T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
+
+
+defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
+ V6_vabsh_enc;
+defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
+ V6_vabsw_enc;
+defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
+ V6_vabsh_sat_enc;
+defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
+ V6_vabsw_sat_enc;
+defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
+ V6_vnot_enc;
+defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">,
+ V6_vassign_enc;
+
+defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
+ V6_vzb_enc;
+defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
+ V6_vzh_enc;
+defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
+ V6_vsb_enc;
+defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
+ V6_vsh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
+ V6_vdealh_enc;
+defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
+ V6_vdealb_enc;
+defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
+ V6_vshuffh_enc;
+defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
+ V6_vshuffb_enc;
+}
+
+let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
+defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
+ V6_vunpackub_enc;
+defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
+ V6_vunpackuh_enc;
+defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
+ V6_vunpackb_enc;
+defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
+ V6_vunpackh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
+ V6_vcl0w_enc;
+defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
+ V6_vcl0h_enc;
+defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
+ V6_vnormamtw_enc;
+defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
+ V6_vnormamth_enc;
+defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
+ V6_vpopcounth_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
+ Type = TypeCVI_VX_DV in
+class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst),
+ (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString, [], "$dst = $_src_" > ;
+
+
+multiclass T_HVX_vmpyacc2 <string asmString> {
+ def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi_acc :
+ T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
+ V6_vrmpybusi_acc_enc;
+defm V6_vrsadubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
+ V6_vrsadubi_acc_enc;
+defm V6_vrmpyubi_acc :
+ T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
+ V6_vrmpyubi_acc_enc;
+
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
+class T_HVX_vmpy2 <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+ asmString>;
+
+
+multiclass T_HVX_vmpy2 <string asmString> {
+ def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi :
+ T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
+defm V6_vrsadubi :
+ T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
+defm V6_vrmpyubi :
+ T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
+
+
+let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
+ hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
+class T_HVX_perm <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
+ (ins RC:$src1, RC:$src2, IntRegs:$src3),
+ asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
+
+multiclass T_HVX_perm <string asmString> {
+ def NAME : T_HVX_perm <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
+ defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
+ defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
+}
+
+// Conditional vector move.
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_cmov <bit isPredNot, RegisterClass RC>
+ : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_cmov <bit isPredNot = 0> {
+ def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
+}
+
+defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
+defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
+
+// Conditional vector combine.
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
+ hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 < (outs RCout:$dst),
+ (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
+ "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
+ let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_ccombine <bit isPredNot = 0> {
+ def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
+defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
+
+let hasNewValue = 1 in
+class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString >;
+
+multiclass T_HVX_shift <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_shift <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_shift_VV <string asmString>:
+ T_HVX_shift <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_shift_WV <string asmString>:
+ T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
+defm V6_valignb :
+ T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
+defm V6_vlalignb :
+ T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrwh :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
+defm V6_vasrwhsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwhsat_enc;
+defm V6_vasrwhrndsat :
+ T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
+ V6_vasrwhrndsat_enc;
+defm V6_vasrwuhsat :
+ T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
+ V6_vasrwuhsat_enc;
+defm V6_vasrhubsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
+ V6_vasrhubsat_enc;
+defm V6_vasrhubrndsat :
+ T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhubrndsat_enc;
+defm V6_vasrhbrndsat :
+ T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+ V6_vasrhbrndsat_enc;
+}
+
+// Assembler mapped -- alias?
+//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
+defm V6_vshuffvdd :
+ T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
+defm V6_vdealvdd :
+ T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
+}
+
+let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
+class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_unpack <string asmString> {
+ def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
+defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_valign <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+ asmString>;
+
+multiclass T_HVX_valign <string asmString> {
+ def NAME : T_HVX_valign <asmString, VectorRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
+}
+
+defm V6_valignbi :
+ T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
+defm V6_vlalignbi :
+ T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+class T_HVX_predAlu <string asmString, RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asmString>;
+
+multiclass T_HVX_predAlu <string asmString> {
+ def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
+
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
+}
+
+defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
+defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
+defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
+defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
+defm V6_pred_and_n :
+ T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_prednot <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
+ "$dst = not($src1)">, V6_pred_not_enc;
+
+def V6_pred_not : T_HVX_prednot <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+ asmString >;
+
+multiclass T_HVX_vcmp2 <string asmString> {
+ def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
+defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
+defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
+defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
+defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
+defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
+defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
+defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
+defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
+
+let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
+
+def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
+
+let isAccumulator = 1 in
+class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+ "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
+
+def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst),
+ (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
+
+def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_lvsplatw <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
+
+def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
+
+
+let hasNewValue = 1 in
+class T_V6_vinsertwr <RegisterClass RC>
+ : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
+ "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
+ V6_vinsertwr_enc;
+
+def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
+class T_V6_pred_scalar2 <RegisterClass RC>
+ : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
+ "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
+
+def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
+
+class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
+ : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+ "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
+
+def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
+ : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>;
+
+class T_HVX_rol_R <string asmString>
+ : T_HVX_rol <asmString, IntRegs, u5Imm>;
+class T_HVX_rol_P <string asmString>
+ : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+
+def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
+let hasNewValue = 1, opNewValue = 0 in
+def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
+ : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
+ asmString, [], "$dst = $_src_" >;
+
+class T_HVX_rol_acc_P <string asmString>
+ : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+
+class T_HVX_rol_acc_R <string asmString>
+ : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+
+def S6_rol_i_p_nac :
+ T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
+def S6_rol_i_p_acc :
+ T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
+def S6_rol_i_p_and :
+ T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
+def S6_rol_i_p_or :
+ T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
+def S6_rol_i_p_xacc :
+ T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
+
+let hasNewValue = 1, opNewValue = 0 in {
+def S6_rol_i_r_nac :
+ T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
+def S6_rol_i_r_acc :
+ T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
+def S6_rol_i_r_and :
+ T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
+def S6_rol_i_r_or :
+ T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
+def S6_rol_i_r_xacc :
+ T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
+}
+
+let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
+class T_V6_extractw <RegisterClass RC>
+ : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
+ "$dst = vextract($src1,$src2)">, V6_extractw_enc;
+
+def V6_extractw : T_V6_extractw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
+
+let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in
+class T_sys0op <string asmString>
+ : ST1Inst <(outs), (ins), asmString>;
+
+let isSolo = 1, validSubTargets = HasV55SubT in {
+def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
+def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
+def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
+}
+
+class T_sys1op <string asmString, RegisterClass RC>
+ : ST1Inst <(outs), (ins RC:$src1), asmString>;
+
+class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
+class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
+
+let isSoloAX = 1, validSubTargets = HasV55SubT in
+def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
+
+let isSolo = 1, validSubTargets = HasV60SubT in {
+def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
+def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
+}
+
+let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
+ validSubTargets = HasV55SubT in
+def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
+ "$dst = l2locka($src1)">, Y5_l2locka_enc;
+
+// not defined on etc side. why?
+// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
+
+let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
+ hasSideEffects = 0,
+validSubTargets = HasV55SubT in
+def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
+ (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
+ "$dst1,$dst2 = vacsh($src1,$src2)", [],
+ "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
+ hasSideEffects = 0 in
+class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
+ RegisterClass RCin2>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
+
+multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
+ def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
+ VecPredRegs128B, VectorRegs128B>;
+}
+
+multiclass T_HVX_alu2_V <string asmString> :
+ T_HVX_alu2 <asmString, VectorRegs>;
+
+multiclass T_HVX_alu2_W <string asmString> :
+ T_HVX_alu2 <asmString, VecDblRegs>;
+
+defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
+ hasSideEffects = 0 in
+defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
+
+class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
+
+multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_V <string asmString> :
+ T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_W <string asmString> :
+ T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
+class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin>
+ : CVI_VA_Resource1<(outs RCout:$dst),
+ (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+ asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+ RegisterClass RCin> {
+ def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
+ let isCodeGenOnly = 1 in
+ def NAME#_128B : T_HVX_vlutb_acc<asmString,
+ !cast<RegisterClass>(RCout#"128B"),
+ !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_acc_V <string asmString> :
+ T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_acc_W <string asmString> :
+ T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
+defm V6_vlutvvb:
+ T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
+defm V6_vlutvwh:
+ T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
+
+let hasNewValue = 1 in {
+ defm V6_vlutvvb_oracc:
+ T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
+ V6_vlutvvb_oracc_enc;
+ defm V6_vlutvwh_oracc:
+ T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
+ V6_vlutvwh_oracc_enc;
+}
+
+// It's a fake instruction and should not be defined?
+def S2_cabacencbin
+ : SInst2<(outs DoubleRegs:$dst),
+ (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+ "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
+
+// Vhist instructions
+def V6_vhistq
+ : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
+ "vhist($src1)">, V6_vhistq_enc;
+
+def V6_vhist
+ : CVI_HIST_Resource1 <(outs), (ins),
+ "vhist" >, V6_vhist_enc;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
index f4fb946..96dd531 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -35,6 +35,34 @@ multiclass bitconvert_64<ValueType a, ValueType b> {
(a DoubleRegs:$src)>;
}
+multiclass bitconvert_vec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VectorRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VectorRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs:$src))),
+ (b VecDblRegs:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs:$src))),
+ (a VecDblRegs:$src)>;
+}
+
+multiclass bitconvert_predvec<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecPredRegs:$src))),
+ (b VectorRegs:$src)>;
+ def : Pat <(a (bitconvert (b VectorRegs:$src))),
+ (a VecPredRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
+ (b VecDblRegs128B:$src)>;
+ def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
+ (a VecDblRegs128B:$src)>;
+}
+
// Bit convert vector types.
defm : bitconvert_32<v4i8, i32>;
defm : bitconvert_32<v2i16, i32>;
@@ -47,6 +75,21 @@ defm : bitconvert_64<v8i8, v4i16>;
defm : bitconvert_64<v8i8, v2i32>;
defm : bitconvert_64<v4i16, v2i32>;
+defm : bitconvert_vec<v64i8, v16i32>;
+defm : bitconvert_vec<v8i64 , v16i32>;
+defm : bitconvert_vec<v32i16, v16i32>;
+
+defm : bitconvert_dblvec<v16i64, v128i8>;
+defm : bitconvert_dblvec<v32i32, v128i8>;
+defm : bitconvert_dblvec<v64i16, v128i8>;
+
+defm : bitconvert_dblvec128B<v64i32, v128i16>;
+defm : bitconvert_dblvec128B<v256i8, v128i16>;
+defm : bitconvert_dblvec128B<v32i64, v128i16>;
+
+defm : bitconvert_dblvec128B<v64i32, v256i8>;
+defm : bitconvert_dblvec128B<v32i64, v256i8>;
+defm : bitconvert_dblvec128B<v128i16, v256i8>;
// Vector shift support. Vector shifting in Hexagon is rather different
// from internal representation of LLVM.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 1d0d015..b207aaf 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -691,15 +691,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
(i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
// Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
// Shift halfword
def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -720,17 +720,17 @@ def : T_RR_pat<C2_cmpeq, int_hexagon_C2_cmpeq>;
def : T_RR_pat<C2_cmpgt, int_hexagon_C2_cmpgt>;
def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
(i32 (C2_cmpgti (I32:$src1),
- (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+ (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
(i32 (C2_cmpgtui (I32:$src1),
- (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+ (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
// The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -1289,3 +1289,5 @@ def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
+include "HexagonIntrinsicsV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
new file mode 100644
index 0000000..24a3e4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -0,0 +1,836 @@
+//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
+
+def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins ),
+ "$dst=#0",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
+}
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=vassignp_W($src1)",
+ [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=vassignp_W_128B($src1)",
+ [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
+ VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
+ (ins VecDblRegs:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=lo_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+ (ins VecDblRegs128B:$src1),
+ "$dst=hi_W($src1)",
+ [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
+
+let AddedComplexity = 100 in {
+def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
+ (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+ Requires<[UseHVXSgl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_loreg)) >,
+ Requires<[UseHVXDbl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
+ (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+ subreg_hireg)) >,
+ Requires<[UseHVXDbl]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v64i8 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v8i64 VectorRegs:$src1))),
+ (v512i1 (V6_vandvrt(v8i64 VectorRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v64i8 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v64i8 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v8i64 (bitconvert (v512i1 VecPredRegs:$src1))),
+ (v8i64 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v128i8 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v16i64 VectorRegs128B:$src1))),
+ (v1024i1 (V6_vandvrt_128B(v16i64 VectorRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v128i8 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v128i8 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v16i64 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+ (v16i64 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai IntRegs:$addr, 0,
+ (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+ (v512i1 (V6_vandvrt
+ (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXSgl]>;
+
+def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)),
+ (V6_vS32b_ai_128B IntRegs:$addr, 0,
+ (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1),
+ (A2_tfrsi 0x01010101))))>,
+ Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+ (v1024i1 (V6_vandvrt_128B
+ (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)),
+ (A2_tfrsi 0x01010101)))>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+ (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1),
+ (MI VectorRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1),
+ (MI VecPredRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+ (MI VecDblRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+ (MI VectorRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2),
+ (MI VecDblRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+ (MI VectorRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+ (MI VecPredRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+ (MI VecPredRegs:$src1, VecPredRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VecPredRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VecPredRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+
+multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ VectorRegs128B:$src2,
+ IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3),
+ (MI VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ IntRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VecDblRegs128B:$src2,
+ IntRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXSgl]>;
+
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1,
+ VectorRegs128B:$src2,
+ VectorRegs128B:$src3,
+ IntRegs:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
+defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
+defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
+defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>;
+defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>;
+defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>;
+defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>;
+defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>;
+defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>;
+defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>;
+defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>;
+defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>;
+defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>;
+defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>;
+defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>;
+defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>;
+defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>;
+defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>;
+defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>;
+defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>;
+defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>;
+defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>;
+defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>;
+defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>;
+defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>;
+defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>;
+defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>;
+defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>;
+defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>;
+defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>;
+defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>;
+
+defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>;
+defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>;
+defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>;
+defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>;
+defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>;
+defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>;
+defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>;
+defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>;
+defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>;
+defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>;
+defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>;
+defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>;
+defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>;
+defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>;
+defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>;
+defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>;
+defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>;
+defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>;
+defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>;
+defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>;
+defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>;
+defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>;
+defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>;
+defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>;
+defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>;
+defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>;
+defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>;
+defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>;
+defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>;
+defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>;
+defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>;
+defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>;
+defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>;
+defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>;
+defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>;
+defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>;
+defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>;
+defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>;
+defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>;
+defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>;
+defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>;
+defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>;
+defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>;
+defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>;
+defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>;
+defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>;
+defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>;
+defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>;
+defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>;
+defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>;
+defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>;
+defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>;
+defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>;
+defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>;
+defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>;
+defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>;
+defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>;
+defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>;
+defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>;
+defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>;
+defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>;
+defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>;
+defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>;
+defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>;
+
+defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>;
+defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>;
+defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>;
+defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>;
+defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>;
+defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>;
+defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>;
+defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>;
+defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>;
+
+defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>;
+defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>;
+
+defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>;
+defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>;
+defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>;
+defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>;
+
+defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>;
+defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>;
+defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>;
+defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>;
+defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>;
+defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>;
+defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>;
+defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>;
+
+defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>;
+defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>;
+defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>;
+defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>;
+defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>;
+defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>;
+defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>;
+defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>;
+defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>;
+defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>;
+defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>;
+defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>;
+defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>;
+defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>;
+defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>;
+
+// Compare instructions
+defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>;
+defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>;
+defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>;
+defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>;
+defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>;
+defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>;
+defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>;
+defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>;
+defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>;
+defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>;
+defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>;
+defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>;
+defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>;
+defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>;
+defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>;
+defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>;
+defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>;
+defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>;
+defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>;
+defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>;
+defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>;
+defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>;
+defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>;
+defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>;
+defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>;
+defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>;
+defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>;
+
+defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>;
+defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>;
+defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>;
+defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>;
+defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>;
+defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>;
+defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>;
+defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>;
+defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>;
+defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>;
+defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>;
+defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>;
+defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>;
+defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>;
+defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>;
+defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>;
+defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>;
+defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>;
+defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>;
+defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>;
+defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>;
+defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>;
+defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>;
+defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>;
+defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>;
+defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>;
+defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>;
+defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>;
+defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>;
+defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>;
+defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>;
+defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>;
+defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>;
+defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>;
+defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>;
+defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>;
+defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>;
+defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>;
+defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>;
+defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>;
+defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>;
+defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>;
+defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>;
+defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>;
+defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>;
+defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>;
+
+defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>;
+defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>;
+defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>;
+defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>;
+defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>;
+defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>;
+defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>;
+defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>;
+defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>;
+defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>;
+defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>;
+defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>;
+
+defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>;
+defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>;
+defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>;
+defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>;
+defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>;
+defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>;
+defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>;
+defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>;
+defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>;
+defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>;
+defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>;
+defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>;
+defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>;
+defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>;
+defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>;
+defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>;
+defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>;
+defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>;
+defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>;
+defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>;
+defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
+defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
+defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
+
+defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
+defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
+defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
+
+defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>;
+defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>;
+defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>;
+
+// assembler mapped.
+//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>;
+// not present earlier.. need to add intrinsic
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>;
+defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>;
+defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>;
+defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>;
+defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>;
+defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>;
+defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>;
+
+defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>;
+defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>;
+
+defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>;
+defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>;
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>;
+
+defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>;
+defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>;
+defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>;
+defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>;
+defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>;
+defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>;
+defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>;
+defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>;
+defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>;
+defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>;
+defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>;
+defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>;
+defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>;
+defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>;
+defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>;
+defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>;
+defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>;
+
+defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>;
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>;
+defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>;
+defm : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>;
+defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>;
+
+defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>;
+defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>;
+defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>;
+defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>;
+
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>;
+def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>;
+def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>;
+def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>;
+def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>;
+def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>;
+def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>;
+def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>;
+def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>;
+def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>;
+def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>;
+def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
+
+defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
+defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
+
+def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+ (v64i16 (V6_vpackwh_sat_128B
+ (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
+ (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+ Requires<[UseHVXDbl]>;
+
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 75189b6..624c0f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -26,39 +26,71 @@
using namespace llvm;
-static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
- HexagonAsmPrinter& Printer) {
+namespace llvm {
+ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ HexagonAsmPrinter &Printer) {
MCContext &MC = Printer.OutContext;
const MCExpr *ME;
- ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC);
+ // Populate the relocation type based on Hexagon target flags
+ // set on an operand
+ MCSymbolRefExpr::VariantKind RelocationType;
+ switch (MO.getTargetFlags()) {
+ default:
+ RelocationType = MCSymbolRefExpr::VK_None;
+ break;
+ case HexagonII::MO_PCREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+ break;
+ case HexagonII::MO_GOT:
+ RelocationType = MCSymbolRefExpr::VK_GOT;
+ break;
+ case HexagonII::MO_LO16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16;
+ break;
+ case HexagonII::MO_HI16:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16;
+ break;
+ case HexagonII::MO_GPREL:
+ RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL;
+ break;
+ }
+
+ ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC);
if (!MO.isJTI() && MO.getOffset())
ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
MC);
- return (MCOperand::createExpr(ME));
+ return MCOperand::createExpr(ME);
}
// Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
- HexagonAsmPrinter& AP) {
- if(MI->getOpcode() == Hexagon::ENDLOOP0){
+void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+ MCInst &MCB, HexagonAsmPrinter &AP) {
+ if (MI->getOpcode() == Hexagon::ENDLOOP0) {
HexagonMCInstrInfo::setInnerLoop(MCB);
return;
}
- if(MI->getOpcode() == Hexagon::ENDLOOP1){
+ if (MI->getOpcode() == Hexagon::ENDLOOP1) {
HexagonMCInstrInfo::setOuterLoop(MCB);
return;
}
- MCInst* MCI = new (AP.OutContext) MCInst;
+ MCInst *MCI = new (AP.OutContext) MCInst;
MCI->setOpcode(MI->getOpcode());
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
"MCI opcode should have been set on construction");
+ bool MustExtend = false;
for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCO;
+ if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
+ MustExtend = true;
switch (MO.getType()) {
default:
@@ -73,11 +105,14 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
APFloat Val = MO.getFPImm()->getValueAPF();
// FP immediates are used only when setting GPRs, so they may be dealt
// with like regular immediates from this point on.
- MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(),
+ AP.OutContext));
break;
}
case MachineOperand::MO_Immediate:
- MCO = MCOperand::createImm(MO.getImm());
+ MCO = MCOperand::createExpr(
+ MCConstantExpr::create(MO.getImm(), AP.OutContext));
break;
case MachineOperand::MO_MachineBasicBlock:
MCO = MCOperand::createExpr
@@ -104,5 +139,8 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
MCI->addOperand(MCO);
}
+ AP.HexagonProcessInstruction(*MCI, *MI);
+ HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI,
+ MustExtend);
MCB.addOperand(MCOperand::createInst(MCI));
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 35f732c..7a52d68 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -179,7 +179,11 @@ void VLIWMachineScheduler::schedule() {
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
- while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+ while (true) {
+ DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ SUnit *SU = SchedImpl->pickNode(IsTopNode);
+ if (!SU) break;
+
if (!checkSchedLimit())
break;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 707bfdb..20c4ab1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -92,6 +92,7 @@ namespace {
/// \brief A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
+ bool isNewValueJumpCandidate(const MachineInstr *MI) const;
};
} // end of anonymous namespace
@@ -280,9 +281,9 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
return true;
}
-// Given a compare operator, return a matching New Value Jump
-// compare operator. Make sure that MI here is included in
-// HexagonInstrInfo.cpp::isNewValueJumpCandidate
+
+// Given a compare operator, return a matching New Value Jump compare operator.
+// Make sure that MI here is included in isNewValueJumpCandidate.
static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
bool secondRegNewified,
MachineBasicBlock *jmpTarget,
@@ -341,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
: Hexagon::J4_cmpgtui_t_jumpnv_nt;
+ case Hexagon::C4_cmpneq:
+ return taken ? Hexagon::J4_cmpeq_f_jumpnv_t
+ : Hexagon::J4_cmpeq_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplte:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmplt_f_jumpnv_t
+ : Hexagon::J4_cmplt_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgt_f_jumpnv_t
+ : Hexagon::J4_cmpgt_f_jumpnv_nt;
+
+ case Hexagon::C4_cmplteu:
+ if (secondRegNewified)
+ return taken ? Hexagon::J4_cmpltu_f_jumpnv_t
+ : Hexagon::J4_cmpltu_f_jumpnv_nt;
+ return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t
+ : Hexagon::J4_cmpgtu_f_jumpnv_nt;
+
default:
llvm_unreachable("Could not find matching New Value Jump instruction.");
}
@@ -348,6 +367,26 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
return 0;
}
+bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI)
+ const {
+ switch (MI->getOpcode()) {
+ case Hexagon::C2_cmpeq:
+ case Hexagon::C2_cmpeqi:
+ case Hexagon::C2_cmpgt:
+ case Hexagon::C2_cmpgti:
+ case Hexagon::C2_cmpgtu:
+ case Hexagon::C2_cmpgtui:
+ case Hexagon::C4_cmpneq:
+ case Hexagon::C4_cmplte:
+ case Hexagon::C4_cmplteu:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
@@ -372,7 +411,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// Loop through all the bb's of the function
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
DEBUG(dbgs() << "** dumping bb ** "
<< MBB->getNumber() << "\n");
@@ -468,7 +507,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
MI->getOperand(0).getReg() == predReg) {
// Not all compares can be new value compare. Arch Spec: 7.6.1.1
- if (QII->isNewValueJumpCandidate(MI)) {
+ if (isNewValueJumpCandidate(MI)) {
assert((MI->getDesc().isCompare()) &&
"Only compare instruction can be collapsed into New Value Jump");
@@ -591,8 +630,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
DebugLoc dl = MI->getDebugLoc();
MachineInstr *NewMI;
- assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
- "This compare is not a New Value Jump candidate.");
+ assert((isNewValueJumpCandidate(cmpInstr)) &&
+ "This compare is not a New Value Jump candidate.");
unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
isSecondOpNewified,
jmpTarget, MBPI);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
index 2bece8f..fbd29cd 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,59 +7,114 @@
//
//===----------------------------------------------------------------------===//
+def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
+def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
+def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
+def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
-
+def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
+def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
+def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
+def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
+def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
+def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
+def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
+def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
+def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
+def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
+def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
+def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
+def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
+def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
+def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
+def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
+def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
+def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
+def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
+def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
// Immediate operands.
-let PrintMethod = "printImmOperand" in {
- def s32Imm : Operand<i32>;
- def s8Imm : Operand<i32>;
- def s8Imm64 : Operand<i64>;
- def s6Imm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
+ let DecoderMethod = "s32ImmDecoder"; }
+ def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s8Imm64 : Operand<i64> { let ParserMatchClass = s8Imm64Operand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
def s6_3Imm : Operand<i32>;
- def s4Imm : Operand<i32>;
- def s4_0Imm : Operand<i32> { let DecoderMethod = "s4_0ImmDecoder"; }
- def s4_1Imm : Operand<i32> { let DecoderMethod = "s4_1ImmDecoder"; }
- def s4_2Imm : Operand<i32> { let DecoderMethod = "s4_2ImmDecoder"; }
- def s4_3Imm : Operand<i32> { let DecoderMethod = "s4_3ImmDecoder"; }
- def u64Imm : Operand<i64>;
- def u32Imm : Operand<i32>;
- def u26_6Imm : Operand<i32>;
- def u16Imm : Operand<i32>;
- def u16_0Imm : Operand<i32>;
- def u16_1Imm : Operand<i32>;
- def u16_2Imm : Operand<i32>;
- def u16_3Imm : Operand<i32>;
- def u11_3Imm : Operand<i32>;
- def u10Imm : Operand<i32>;
- def u9Imm : Operand<i32>;
- def u8Imm : Operand<i32>;
- def u7Imm : Operand<i32>;
- def u6Imm : Operand<i32>;
- def u6_0Imm : Operand<i32>;
- def u6_1Imm : Operand<i32>;
- def u6_2Imm : Operand<i32>;
- def u6_3Imm : Operand<i32>;
- def u5Imm : Operand<i32>;
+ def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
+ let DecoderMethod = "s4_0ImmDecoder"; }
+ def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
+ let DecoderMethod = "s4_1ImmDecoder"; }
+ def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
+ let DecoderMethod = "s4_2ImmDecoder"; }
+ def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
+ let DecoderMethod = "s4_3ImmDecoder"; }
+ def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
+ def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+ def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
+ def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
+ def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
+ def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
+ def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
+ def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
+ def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
+ def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
+ def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
+ def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
+ def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
+ def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+ def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
+ def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
+ def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
+ def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
+ def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
+ def u5_0Imm : Operand<i32>;
+ def u5_1Imm : Operand<i32>;
def u5_2Imm : Operand<i32>;
def u5_3Imm : Operand<i32>;
- def u4Imm : Operand<i32>;
+ def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
def u4_0Imm : Operand<i32>;
+ def u4_1Imm : Operand<i32>;
def u4_2Imm : Operand<i32>;
- def u3Imm : Operand<i32>;
+ def u4_3Imm : Operand<i32>;
+ def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
def u3_0Imm : Operand<i32>;
def u3_1Imm : Operand<i32>;
- def u2Imm : Operand<i32>;
- def u1Imm : Operand<i32>;
- def n8Imm : Operand<i32>;
- def m6Imm : Operand<i32>;
+ def u3_2Imm : Operand<i32>;
+ def u3_3Imm : Operand<i32>;
+ def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
+ def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
+ def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
}
-let PrintMethod = "printNOneImmOperand" in
-def nOneImm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE" in {
+ def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
+ let PrintMethod = "prints4_6ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
+ let DecoderMethod = "s4_6ImmDecoder";}
+ def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
+ let PrintMethod = "prints3_6ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+ def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
+ let DecoderMethod = "s3_6ImmDecoder";}
+}
//
// Immediate predicates
@@ -81,32 +136,12 @@ def s31_1ImmPred : PatLeaf<(i32 imm), [{
def s30_2ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
+ return isShiftedInt<30,2>(v);
}]>;
def s29_3ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
-}]>;
-
-def s22_10ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<22,10>(v);
-}]>;
-
-def s8_24ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<8,24>(v);
-}]>;
-
-def s16_16ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<16,16>(v);
-}]>;
-
-def s26_6ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<26,6>(v);
+ return isShiftedInt<29,3>(v);
}]>;
def s16ImmPred : PatLeaf<(i32 imm), [{
@@ -114,16 +149,6 @@ def s16ImmPred : PatLeaf<(i32 imm), [{
return isInt<16>(v);
}]>;
-def s13ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<13>(v);
-}]>;
-
-def s12ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<12>(v);
-}]>;
-
def s11_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<11>(v);
@@ -149,16 +174,6 @@ def s10ImmPred : PatLeaf<(i32 imm), [{
return isInt<10>(v);
}]>;
-def s9ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<9>(v);
-}]>;
-
-def m9ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<9>(v) && (v != -256);
-}]>;
-
def s8ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<8>(v);
@@ -194,7 +209,6 @@ def s4_3ImmPred : PatLeaf<(i32 imm), [{
return isShiftedInt<4,3>(v);
}]>;
-
def u64ImmPred : PatLeaf<(i64 imm), [{
// Adding "N ||" to suppress gcc unused warning.
return (N || true);
@@ -230,19 +244,19 @@ def u26_6ImmPred : PatLeaf<(i32 imm), [{
return isShiftedUInt<26,6>(v);
}]>;
-def u16ImmPred : PatLeaf<(i32 imm), [{
+def u16_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<16>(v);
}]>;
-def u16_s8ImmPred : PatLeaf<(i32 imm), [{
+def u16_1ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<16,8>(v);
+ return isShiftedUInt<16,1>(v);
}]>;
-def u16_0ImmPred : PatLeaf<(i32 imm), [{
+def u16_2ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- return isUInt<16>(v);
+ return isShiftedUInt<16,2>(v);
}]>;
def u11_3ImmPred : PatLeaf<(i32 imm), [{
@@ -250,6 +264,11 @@ def u11_3ImmPred : PatLeaf<(i32 imm), [{
return isShiftedUInt<11,3>(v);
}]>;
+def u10ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<10>(v);
+}]>;
+
def u9ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
@@ -321,6 +340,11 @@ def u1ImmPred : PatLeaf<(i1 imm), [{
return isUInt<1>(v);
}]>;
+def u1ImmPred32 : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isUInt<1>(v);
+}]>;
+
def m5BImmPred : PatLeaf<(i32 imm), [{
// m5BImmPred predicate - True if the (char) number is in range -1 .. -31
// and will fit in a 5 bit field when made positive, for use in memops.
@@ -379,7 +403,7 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr5ImmPred : PatLeaf<(i32 imm), [{
- // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+ // True if the immediate is in range 0..31.
int32_t v = (int32_t)N->getSExtValue();
return (v >= 0 && v <= 31);
}]>;
@@ -404,14 +428,13 @@ def Clr4ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr4ImmPred : PatLeaf<(i32 imm), [{
- // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+ // True if the immediate is in the range 0..15.
int16_t v = (int16_t)N->getSExtValue();
return (v >= 0 && v <= 15);
}]>;
def Set3ImmPred : PatLeaf<(i32 imm), [{
- // Set3ImmPred predicate - True if the number is in the series of values:
- // [ 2^0, 2^1, ... 2^7 ].
+ // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ].
// For use in setbit immediate.
uint8_t v = (int8_t)N->getSExtValue();
// Constrain to 8 bits, and then check for single bit.
@@ -419,9 +442,7 @@ def Set3ImmPred : PatLeaf<(i32 imm), [{
}]>;
def Clr3ImmPred : PatLeaf<(i32 imm), [{
- // Clr3ImmPred predicate - True if the number is in the series of
- // bit negated values:
- // [ 2^0, 2^1, ... 2^7 ].
+ // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ].
// For use in setbit and clrbit immediate.
uint8_t v = ~ (int8_t)N->getSExtValue();
// Constrain to 8 bits, and then check for single bit.
@@ -429,76 +450,109 @@ def Clr3ImmPred : PatLeaf<(i32 imm), [{
}]>;
def SetClr3ImmPred : PatLeaf<(i32 imm), [{
- // SetClr3ImmPred predicate - True if the immediate is in the range 0..7.
+ // True if the immediate is in the range 0..7.
int8_t v = (int8_t)N->getSExtValue();
return (v >= 0 && v <= 7);
}]>;
// Extendable immediate operands.
-
-let PrintMethod = "printExtOperand" in {
- def f32Ext : Operand<f32>;
- def s16Ext : Operand<i32> { let DecoderMethod = "s16ImmDecoder"; }
- def s12Ext : Operand<i32> { let DecoderMethod = "s12ImmDecoder"; }
- def s11_0Ext : Operand<i32> { let DecoderMethod = "s11_0ImmDecoder"; }
- def s11_1Ext : Operand<i32> { let DecoderMethod = "s11_1ImmDecoder"; }
- def s11_2Ext : Operand<i32> { let DecoderMethod = "s11_2ImmDecoder"; }
- def s11_3Ext : Operand<i32> { let DecoderMethod = "s11_3ImmDecoder"; }
- def s10Ext : Operand<i32> { let DecoderMethod = "s10ImmDecoder"; }
- def s9Ext : Operand<i32> { let DecoderMethod = "s90ImmDecoder"; }
- def s8Ext : Operand<i32> { let DecoderMethod = "s8ImmDecoder"; }
- def s7Ext : Operand<i32>;
- def s6Ext : Operand<i32> { let DecoderMethod = "s6_0ImmDecoder"; }
- def u6Ext : Operand<i32>;
- def u7Ext : Operand<i32>;
- def u8Ext : Operand<i32>;
- def u9Ext : Operand<i32>;
- def u10Ext : Operand<i32>;
- def u6_0Ext : Operand<i32>;
- def u6_1Ext : Operand<i32>;
- def u6_2Ext : Operand<i32>;
- def u6_3Ext : Operand<i32>;
+def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
+def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
+def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
+def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
+def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
+def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
+def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
+def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
+def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
+def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
+def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
+def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
+def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
+def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
+def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
+def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
+def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
+def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
+def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
+def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
+
+
+
+let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
+ DecoderMethod = "unsignedImmDecoder" in {
+ def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
+ def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
+ let DecoderMethod = "s16ImmDecoder"; }
+ def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
+ let DecoderMethod = "s12ImmDecoder"; }
+ def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
+ let DecoderMethod = "s11_0ImmDecoder"; }
+ def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
+ let DecoderMethod = "s11_1ImmDecoder"; }
+ def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
+ let DecoderMethod = "s11_2ImmDecoder"; }
+ def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
+ let DecoderMethod = "s11_3ImmDecoder"; }
+ def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
+ let DecoderMethod = "s10ImmDecoder"; }
+ def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
+ let DecoderMethod = "s90ImmDecoder"; }
+ def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
+ let DecoderMethod = "s8ImmDecoder"; }
+ def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
+ def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
+ let DecoderMethod = "s6_0ImmDecoder"; }
+ def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
+ def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
+ def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
+ def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
+ def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+ def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
+ def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
+ def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
+ def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
+ def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
}
-def s10ExtPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- if (isInt<10>(v))
- return true;
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
+def s4_7ImmPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<4,7>(v);
+ return false;
}]>;
-def s8ExtPred : PatLeaf<(i32 imm), [{
+def s3_7ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit signed field.
- return isConstExtProfitable(Node) && isInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 128-byte aligned.
+ return isShiftedInt<3,7>(v);
+ return false;
}]>;
-def u8ExtPred : PatLeaf<(i32 imm), [{
+def s4_6ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<8>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 10-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<4,6>(v);
+ return false;
}]>;
-def u9ExtPred : PatLeaf<(i32 imm), [{
+def s3_6ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
- if (isUInt<9>(v))
- return true;
-
- // Return true if extending this immediate is profitable and the value
- // can fit in a 32-bit unsigned field.
- return isConstExtProfitable(Node) && isUInt<32>(v);
+ if (HST->hasV60TOps())
+ // Return true if the immediate can fit in a 9-bit sign extended field and
+ // is 64-byte aligned.
+ return isShiftedInt<3,6>(v);
+ return false;
}]>;
@@ -523,21 +577,21 @@ let PrintMethod = "printGlobalOperand" in {
let PrintMethod = "printJumpTable" in
def jumptablebase : Operand<i32>;
-def brtarget : Operand<OtherVT>;
+def brtarget : Operand<OtherVT> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
def brtargetExt : Operand<OtherVT> {
- let PrintMethod = "printExtBrtarget";
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
+}
+def calltarget : Operand<i32> {
+ let DecoderMethod = "brtargetDecoder";
+ let PrintMethod = "printBrtarget";
}
-def calltarget : Operand<i32>;
def bblabel : Operand<i32>;
-def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
- let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
- let PrintMethod = "printSymbolLo";
-}
+def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
// Return true if for a 32 to 64-bit sign-extended load.
def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
new file mode 100644
index 0000000..1723771
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -0,0 +1,150 @@
+//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include "Hexagon.h"
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonOptimizeSZextends();
+ void initializeHexagonOptimizeSZextendsPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonOptimizeSZextends : public FunctionPass {
+ public:
+ static char ID;
+ HexagonOptimizeSZextends() : FunctionPass(ID) {
+ initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+ const char *getPassName() const override {
+ return "Remove sign extends";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addPreserved<StackProtector>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool intrinsicAlreadySextended(Intrinsic::ID IntID);
+ };
+}
+
+char HexagonOptimizeSZextends::ID = 0;
+
+INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs",
+ "Remove Sign and Zero Extends for Args", false, false)
+
+bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) {
+ switch(IntID) {
+ case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
+ unsigned Idx = 1;
+ // Try to optimize sign extends in formal parameters. It's relying on
+ // callee already sign extending the values. I'm not sure if our ABI
+ // requires callee to sign extend though.
+ for (auto &Arg : F.args()) {
+ if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ if (!isa<PointerType>(Arg.getType())) {
+ for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
+ if (isa<SExtInst>(*UI)) {
+ Instruction* Use = cast<Instruction>(*UI);
+ SExtInst* SI = new SExtInst(&Arg, Use->getType());
+ assert (EVT::getEVT(SI->getType()) ==
+ (EVT::getEVT(Use->getType())));
+ ++UI;
+ Use->replaceAllUsesWith(SI);
+ Instruction* First = &F.getEntryBlock().front();
+ SI->insertBefore(First);
+ Use->eraseFromParent();
+ } else {
+ ++UI;
+ }
+ }
+ }
+ }
+ ++Idx;
+ }
+
+ // Try to remove redundant sext operations on Hexagon. The hardware
+ // already sign extends many 16 bit intrinsic operations to 32 bits.
+ // For example:
+ // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y)
+ // %sext233 = shl i32 %34, 16
+ // %conv52 = ashr exact i32 %sext233, 16
+ for (auto &B : F) {
+ for (auto &I : B) {
+ // Look for arithmetic shift right by 16.
+ BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I);
+ if (!(Ashr && Ashr->getOpcode() == Instruction::AShr))
+ continue;
+ Value *AshrOp1 = Ashr->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1);
+ // Right shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Ashr comes from logical shift left.
+ Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0));
+ if (!(Shl && Shl->getOpcode() == Instruction::Shl))
+ continue;
+ Value *Intr = Shl->getOperand(0);
+ Value *ShlOp1 = Shl->getOperand(1);
+ C = dyn_cast<ConstantInt>(ShlOp1);
+ // Left shifted by 16.
+ if (!(C && C->getSExtValue() == 16))
+ continue;
+
+ // The first operand of Shl comes from an intrinsic.
+ if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) {
+ if (!intrinsicAlreadySextended(I->getIntrinsicID()))
+ continue;
+ // All is well. Replace all uses of AShr with I.
+ for (auto UI = Ashr->user_begin(), UE = Ashr->user_end();
+ UI != UE; ++UI) {
+ const Use &TheUse = UI.getUse();
+ if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) {
+ J->replaceUsesOfWith(Ashr, I);
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+
+FunctionPass *llvm::createHexagonOptimizeSZextends() {
+ return new HexagonOptimizeSZextends();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index 93dcbe2..e68ff85 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -124,7 +124,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
// Loop over all of the basic blocks.
for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
PeepholeMap.clear();
PeepholeDoubleRegsMap.clear();
@@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
unsigned DstReg = Dst.getReg();
unsigned SrcReg = Src1.getReg();
PeepholeDoubleRegsMap[DstReg] =
- std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+ std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
}
// Look for P=NOT(P).
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index f6bb4a0..61c0589 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -66,6 +66,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
switch (HST.getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
return CallerSavedRegsV4;
}
llvm_unreachable(
@@ -84,6 +86,8 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
case HexagonSubtarget::V4:
case HexagonSubtarget::V5:
+ case HexagonSubtarget::V55:
+ case HexagonSubtarget::V60:
return CalleeSavedRegsV3;
}
llvm_unreachable("Callee saved registers requested for unknown architecture "
@@ -98,7 +102,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::R29);
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
- Reserved.set(Hexagon::D14);
+ Reserved.set(Hexagon::PC);
Reserved.set(Hexagon::D15);
Reserved.set(Hexagon::LC0);
Reserved.set(Hexagon::LC1);
@@ -116,62 +120,21 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
-
MachineBasicBlock &MB = *MI.getParent();
MachineFunction &MF = *MB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HFI = *HST.getFrameLowering();
+ unsigned BP = 0;
int FI = MI.getOperand(FIOp).getIndex();
- int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm();
- bool HasAlloca = MFI.hasVarSizedObjects();
- bool HasAlign = needsStackRealignment(MF);
-
- // XXX: Fixed objects cannot be accessed through SP if there are aligned
- // objects in the local frame, or if there are dynamically allocated objects.
- // In such cases, there has to be FP available.
- if (!HFI.hasFP(MF)) {
- assert(!HasAlloca && !HasAlign && "This function must have frame pointer");
- // We will not reserve space on the stack for the lr and fp registers.
- Offset -= 8;
- }
-
- unsigned SP = getStackRegister(), FP = getFrameRegister();
- unsigned AP = 0;
- if (MachineInstr *AI = HFI.getAlignaInstr(MF))
- AP = AI->getOperand(0).getReg();
- unsigned FrameSize = MFI.getStackSize();
-
- // Special handling of dbg_value instructions and INLINEASM.
- if (MI.isDebugValue() || MI.isInlineAsm()) {
- MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/);
- MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize);
- return;
- }
-
- bool UseFP = false, UseAP = false; // Default: use SP.
- if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
- UseFP = HasAlloca || HasAlign;
- } else {
- if (HasAlloca) {
- if (HasAlign)
- UseAP = true;
- else
- UseFP = true;
- }
- }
+ // Select the base pointer (BP) and calculate the actual offset from BP
+ // to the beginning of the object at index FI.
+ int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+ // Add the offset from the instruction.
+ int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
unsigned Opc = MI.getOpcode();
- bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset);
- bool ValidFP = HII.isValidOffset(Opc, Offset);
-
- // Calculate the actual offset in the instruction.
- int64_t RealOffset = Offset;
- if (!UseFP && !UseAP)
- RealOffset = FrameSize+Offset;
-
switch (Opc) {
case Hexagon::TFR_FIA:
MI.setDesc(HII.get(Hexagon::A2_addi));
@@ -184,20 +147,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
break;
}
- unsigned BP = 0;
- bool Valid = false;
- if (UseFP) {
- BP = FP;
- Valid = ValidFP;
- } else if (UseAP) {
- BP = AP;
- Valid = ValidFP;
- } else {
- BP = SP;
- Valid = ValidSP;
- }
-
- if (Valid) {
+ if (HII.isValidOffset(Opc, RealOffset)) {
MI.getOperand(FIOp).ChangeToRegister(BP, false);
MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
return;
@@ -223,8 +173,8 @@ unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
&MF) const {
const HexagonFrameLowering *TFI = getFrameLowering(MF);
if (TFI->hasFP(MF))
- return Hexagon::R30;
- return Hexagon::R29;
+ return getFrameRegister();
+ return getStackRegister();
}
@@ -238,17 +188,9 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
}
-bool
-HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
- const HexagonFrameLowering *TFI = getFrameLowering(MF);
- return TFI->hasFP(MF);
-}
-
-
-bool
-HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getMaxAlignment() > 8;
+bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
+ const {
+ return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 7edefee..db7e0f2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -63,8 +63,6 @@ public:
return true;
}
- bool needsStackRealignment(const MachineFunction &MF) const override;
-
/// Returns true if the frame pointer is valid.
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index edf1c25..81629dc 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -53,6 +53,12 @@ let Namespace = "Hexagon" in {
let Num = num;
}
+
+ // Rq - vector predicate registers
+ class Rq<bits<3> num, string n> : Register<n, []> {
+ let HWEncoding{2-0} = num;
+ }
+
// Rc - control registers
class Rc<bits<5> num, string n,
list<string> alt = [], list<Register> alias = []> :
@@ -131,20 +137,21 @@ let Namespace = "Hexagon" in {
def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
DwarfRegNum<[71]>;
- def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[72]>;
- def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[73]>;
+ def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use
+ def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
+ def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
- def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[74]> {
+ def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> {
let SubRegIndices = [subreg_overflow];
let SubRegs = [USR_OVF];
}
- def PC : Rc<9, "pc">, DwarfRegNum<[75]>;
- def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[76]>;
- def GP : Rc<11, "gp">, DwarfRegNum<[77]>;
- def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[78]>;
- def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[79]>;
- def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[80]>;
- def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[81]>;
+ def PC : Rc<9, "pc">, DwarfRegNum<[76]>;
+ def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP : Rc<11, "gp">, DwarfRegNum<[78]>;
+ def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
}
// Control registers pairs.
@@ -158,6 +165,36 @@ let Namespace = "Hexagon" in {
def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
}
+ foreach i = 0-31 in {
+ def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
+ }
+
+ // Aliases of the V* registers used to hold double vec values.
+ let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+ def W0 : Rd< 0, "v1:0", [V0, V1]>, DwarfRegNum<[99]>;
+ def W1 : Rd< 2, "v3:2", [V2, V3]>, DwarfRegNum<[101]>;
+ def W2 : Rd< 4, "v5:4", [V4, V5]>, DwarfRegNum<[103]>;
+ def W3 : Rd< 6, "v7:6", [V6, V7]>, DwarfRegNum<[105]>;
+ def W4 : Rd< 8, "v9:8", [V8, V9]>, DwarfRegNum<[107]>;
+ def W5 : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
+ def W6 : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
+ def W7 : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
+ def W8 : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
+ def W9 : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
+ def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
+ def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
+ def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
+ def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
+ def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
+ def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+ }
+
+ // Vector Predicate registers.
+ def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
+ def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
+ def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
+ def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -169,10 +206,34 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
R10, R11, R29, R30, R31)> {
}
+// Registers are listed in reverse order for allocation preference reasons.
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+ (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4),
(sequence "D%u", 6, 13), D5, D14, D15)>;
+def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "W%u", 0, 15))>;
+
+def VectorRegs128B : RegisterClass<"Hexagon",
+ [v128i8, v64i16, v32i32, v16i64], 1024,
+ (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs128B : RegisterClass<"Hexagon",
+ [v256i8,v128i16,v64i32,v32i64], 2048,
+ (add (sequence "W%u", 0, 15))>;
+
+def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512,
+ (add (sequence "Q%u", 0, 3))>;
+
+def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024,
+ (add (sequence "Q%u", 0, 3))>;
def PredRegs : RegisterClass<"Hexagon",
[i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
deleted file mode 100644
index 7069ad3..0000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends //
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Pass that removes sign extends for function parameters. These parameters
-// are already sign extended by the caller per Hexagon's ABI
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-namespace llvm {
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
- void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
-}
-
-namespace {
- struct HexagonRemoveExtendArgs : public FunctionPass {
- public:
- static char ID;
- HexagonRemoveExtendArgs() : FunctionPass(ID) {
- initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
-
- const char *getPassName() const override {
- return "Remove sign extends";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addPreserved<StackProtector>();
- FunctionPass::getAnalysisUsage(AU);
- }
- };
-}
-
-char HexagonRemoveExtendArgs::ID = 0;
-
-INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
- "Remove Sign and Zero Extends for Args", false, false)
-
-bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
- unsigned Idx = 1;
- for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
- ++AI, ++Idx) {
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
- Argument* Arg = AI;
- if (!isa<PointerType>(Arg->getType())) {
- for (auto UI = Arg->user_begin(); UI != Arg->user_end();) {
- if (isa<SExtInst>(*UI)) {
- Instruction* I = cast<Instruction>(*UI);
- SExtInst* SI = new SExtInst(Arg, I->getType());
- assert (EVT::getEVT(SI->getType()) ==
- (EVT::getEVT(I->getType())));
- ++UI;
- I->replaceAllUsesWith(SI);
- Instruction* First = F.getEntryBlock().begin();
- SI->insertBefore(First);
- I->eraseFromParent();
- } else {
- ++UI;
- }
- }
- }
- }
- }
- return true;
-}
-
-
-
-FunctionPass*
-llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
- return new HexagonRemoveExtendArgs();
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
index 528cafc..6e4987b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -13,6 +13,12 @@
include "HexagonScheduleV4.td"
+// V55 Machine Info +
+include "HexagonScheduleV55.td"
+
//===----------------------------------------------------------------------===//
-// V4 Machine Info -
+// V60 Machine Info -
//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV60.td"
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
index a7d2d47..67af147 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -35,10 +35,11 @@ def SLOT_ENDLOOP: FuncUnit;
// Itinerary classes.
def PSEUDO : InstrItinClass;
-def PSEUDOM : InstrItinClass;
+def PSEUDOM : InstrItinClass;
// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
def DUPLEX : InstrItinClass;
def PREFIX : InstrItinClass;
+def COMPOUND_CJ_ARCHDEPSLOT : InstrItinClass;
def COMPOUND : InstrItinClass;
def ALU32_2op_tc_1_SLOT0123 : InstrItinClass;
@@ -58,6 +59,7 @@ def CR_tc_2early_SLOT3 : InstrItinClass;
def CR_tc_3x_SLOT23 : InstrItinClass;
def CR_tc_3x_SLOT3 : InstrItinClass;
def J_tc_2early_SLOT23 : InstrItinClass;
+def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass;
def J_tc_2early_SLOT2 : InstrItinClass;
def LD_tc_ld_SLOT01 : InstrItinClass;
def LD_tc_ld_SLOT0 : InstrItinClass;
@@ -91,6 +93,7 @@ def V4LDST_tc_st_SLOT0 : InstrItinClass;
def V4LDST_tc_st_SLOT01 : InstrItinClass;
def J_tc_2early_SLOT0123 : InstrItinClass;
def EXTENDER_tc_1_SLOT0123 : InstrItinClass;
+def S_3op_tc_3stall_SLOT23 : InstrItinClass;
def HexagonItinerariesV4 :
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
new file mode 100644
index 0000000..d9ad25d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -0,0 +1,170 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+
+def CJ_tc_1_SLOT23 : InstrItinClass;
+def CJ_tc_2early_SLOT23 : InstrItinClass;
+def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass;
+def COPROC_VX_vtc_long_SLOT23 : InstrItinClass;
+def COPROC_VX_vtc_SLOT23 : InstrItinClass;
+def J_tc_3stall_SLOT2 : InstrItinClass;
+def MAPPING_tc_1_SLOT0123 : InstrItinClass;
+def M_tc_3stall_SLOT23 : InstrItinClass;
+def SUBINSN_tc_1_SLOT01 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT0 : InstrItinClass;
+def SUBINSN_tc_2early_SLOT01 : InstrItinClass;
+def SUBINSN_tc_3stall_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT0 : InstrItinClass;
+def SUBINSN_tc_ld_SLOT01 : InstrItinClass;
+def SUBINSN_tc_st_SLOT01 : InstrItinClass;
+
+def HexagonItinerariesV55 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Misc
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>
+
+ ]>;
+
+def HexagonModelV55 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV55;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
new file mode 100644
index 0000000..2ccff82
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -0,0 +1,310 @@
+//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec".
+def CVI_ST : FuncUnit;
+def CVI_XLANE : FuncUnit;
+def CVI_SHIFT : FuncUnit;
+def CVI_MPY0 : FuncUnit;
+def CVI_MPY1 : FuncUnit;
+def CVI_LD : FuncUnit;
+
+// Combined functional units.
+def CVI_XLSHF : FuncUnit;
+def CVI_MPY01 : FuncUnit;
+def CVI_ALL : FuncUnit;
+
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+ ComboFuncUnits<[
+ ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
+ ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
+ ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1, CVI_LD]>
+ ]>;
+
+// Note: When adding additional vector scheduling classes, add the
+// corresponding methods to the class HexagonInstrInfo.
+def CVI_VA : InstrItinClass;
+def CVI_VA_DV : InstrItinClass;
+def CVI_VX_LONG : InstrItinClass;
+def CVI_VX_LATE : InstrItinClass;
+def CVI_VX : InstrItinClass;
+def CVI_VX_DV_LONG : InstrItinClass;
+def CVI_VX_DV : InstrItinClass;
+def CVI_VX_DV_SLOT2 : InstrItinClass;
+def CVI_VP : InstrItinClass;
+def CVI_VP_LONG : InstrItinClass;
+def CVI_VP_VS_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG_EARLY : InstrItinClass;
+def CVI_VP_VS_LONG : InstrItinClass;
+def CVI_VP_VS : InstrItinClass;
+def CVI_VP_DV : InstrItinClass;
+def CVI_VS : InstrItinClass;
+def CVI_VINLANESAT : InstrItinClass;
+def CVI_VM_LD : InstrItinClass;
+def CVI_VM_TMP_LD : InstrItinClass;
+def CVI_VM_CUR_LD : InstrItinClass;
+def CVI_VM_VP_LDU : InstrItinClass;
+def CVI_VM_ST : InstrItinClass;
+def CVI_VM_NEW_ST : InstrItinClass;
+def CVI_VM_STU : InstrItinClass;
+def CVI_HIST : InstrItinClass;
+def CVI_VA_EXT : InstrItinClass;
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine.
+// This file describes that machine information.
+//
+// |===========|==================================================|
+// | PIPELINE | Instruction Classes |
+// |===========|==================================================|
+// | SLOT0 | LD ST ALU32 MEMOP NV SYSTEM |
+// |-----------|--------------------------------------------------|
+// | SLOT1 | LD ST ALU32 |
+// |-----------|--------------------------------------------------|
+// | SLOT2 | XTYPE ALU32 J JR |
+// |-----------|--------------------------------------------------|
+// | SLOT3 | XTYPE ALU32 J CR |
+// |===========|==================================================|
+//
+//
+// In addition to using the above SLOTS, there are also six vector pipelines
+// in the CVI co-processor in the Hexagon V60 machine.
+//
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// SLOT | CVI_LD | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST |
+// ==== |=========| |=========| |=========| |=========| |=========| |=========|
+// S0-3 | | | CVI_VA | | CVI_VA | | CVI_VA | | CVI_VA | | |
+// S2-3 | | | CVI_VX | | CVI_VX | | | | | | |
+// S0-3 | | | | | | | | | CVI_VP | | |
+// S0-3 | | | | | | | CVI_VS | | | | |
+// S0-1 |(CVI_LD) | | CVI_LD | | CVI_LD | | CVI_LD | | CVI_LD | | |
+// S0-1 |(C*TMP_LD) | | | | | | | | | |
+// S01 |(C*_LDU) | | | | | | | | C*_LDU | | |
+// S0 | | | CVI_ST | | CVI_ST | | CVI_ST | | CVI_ST | |(CVI_ST) |
+// S0 | | | | | | | | | | |(C*TMP_ST)
+// S01 | | | | | | | | | VSTU | |(C*_STU) |
+// |=========| |=========| |=========| |=========| |=========| |=========|
+// |=====================| |=====================|
+// | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT|
+// |=====================| |=====================|
+// S0-3 | CVI_VA_DV | | CVI_VA_DV |
+// S0-3 | | | CVI_VP_DV |
+// S2-3 | CVI_VX_DV | | |
+// |=====================| |=====================|
+// |=====================================================================|
+// S0-3 | CVI_HIST Histogram |
+// S0123| CVI_VA_EXT Extract |
+// |=====================================================================|
+
+def HexagonItinerariesV60 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<3, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<2, [SLOT2]>]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<3, [SLOT2]>]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+ [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Subinsn
+ InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
+ InstrItinData<SUBINSN_tc_1_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_2early_SLOT01,
+ [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<SUBINSN_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+ // Vector
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23 ,
+ [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // Duplex and Compound
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // Misc
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // Latest CVI spec definitions.
+ InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VA_DV,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VX_DV_LONG,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV,
+ [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VX_DV_SLOT2,
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>]>,
+ InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VP_VS_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_VS_LONG_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VP_DV , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>]>,
+ InstrItinData<CVI_VS,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VINLANESAT,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>]>,
+ InstrItinData<CVI_VM_LD , [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>]>,
+ InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_VM_ST , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>]>,
+ InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>]>,
+ InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>]>,
+ InstrItinData<CVI_HIST , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>]>
+ ]>;
+
+def HexagonModelV60 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV60;
+ let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V60 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 276cc69..239dbda 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -12,12 +12,11 @@
//===----------------------------------------------------------------------===//
#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
#define DEBUG_TYPE "hexagon-selectiondag-info"
-bool llvm::flag_aligned_memcpy;
-
SDValue
HexagonSelectionDAGInfo::
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
@@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- flag_aligned_memcpy = false;
- if ((Align & 0x3) == 0) {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (ConstantSize) {
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if ((SizeVal > 32) && ((SizeVal % 8) == 0))
- flag_aligned_memcpy = true;
- }
- }
-
- return SDValue();
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+ return SDValue();
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (SizeVal < 32 || (SizeVal % 8) != 0)
+ return SDValue();
+
+ // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
+ //
+ const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Entry.Node = Dst;
+ Args.push_back(Entry);
+ Entry.Node = Src;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+
+ const char *SpecialMemcpyName =
+ "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getTargetExternalSymbol(
+ SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args), 0)
+ .setDiscardResult();
+
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ return CallResult.second;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index d3eb56f..10fe606 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -81,7 +81,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
- MachineBasicBlock* MBB = MBBb;
+ MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block
MachineBasicBlock::iterator MII = MBB->begin();
MachineBasicBlock::iterator MIE = MBB->end ();
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
new file mode 100644
index 0000000..d4e95b0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -0,0 +1,1209 @@
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonSplitDoubleRegs();
+ void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+ cl::desc("Maximum number of split partitions"));
+ static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+ cl::desc("Do not split loads or stores"));
+
+ class HexagonSplitDoubleRegs : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+ TII(nullptr) {
+ initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+ }
+ const char *getPassName() const override {
+ return "Hexagon Split Double Registers";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ static const TargetRegisterClass *const DoubleRC;
+
+ const HexagonRegisterInfo *TRI;
+ const HexagonInstrInfo *TII;
+ const MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ typedef std::set<unsigned> USet;
+ typedef std::map<unsigned,USet> UUSetMap;
+ typedef std::pair<unsigned,unsigned> UUPair;
+ typedef std::map<unsigned,UUPair> UUPairMap;
+ typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+ bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+ bool isVolatileInstr(const MachineInstr *MI) const;
+ bool isFixedInstr(const MachineInstr *MI) const;
+ void partitionRegisters(UUSetMap &P2Rs);
+ int32_t profit(const MachineInstr *MI) const;
+ bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+ void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+ void collectIndRegs(LoopRegMap &IRM);
+
+ void createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR);
+ void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+ void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+ void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+ void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+ bool splitPartition(const USet &Part);
+
+ static int Counter;
+ static void dump_partition(raw_ostream&, const USet&,
+ const TargetRegisterInfo&);
+ };
+ char HexagonSplitDoubleRegs::ID;
+ int HexagonSplitDoubleRegs::Counter = 0;
+ const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+ = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+ "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+ assert(R.isReg());
+ return getDefRegState(R.isDef()) |
+ getImplRegState(R.isImplicit()) |
+ getKillRegState(R.isKill()) |
+ getDeadRegState(R.isDead()) |
+ getUndefRegState(R.isUndef()) |
+ getInternalReadRegState(R.isInternalRead()) |
+ (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+ const USet &Part, const TargetRegisterInfo &TRI) {
+ dbgs() << '{';
+ for (auto I : Part)
+ dbgs() << ' ' << PrintReg(I, &TRI);
+ dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+ for (auto I : IRM) {
+ const USet &Rs = I.second;
+ if (Rs.find(Reg) != Rs.end())
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+ for (auto &I : MI->memoperands())
+ if (I->isVolatile())
+ return true;
+ return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+ if (MI->mayLoad() || MI->mayStore())
+ if (MemRefsFixed || isVolatileInstr(MI))
+ return true;
+ if (MI->isDebugValue())
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ default:
+ return true;
+
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY:
+ break;
+
+ case Hexagon::L2_loadrd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(1).isReg())
+ break;
+ return true;
+ case Hexagon::S2_storerd_io:
+ // Not handling stack stores (only reg-based addresses).
+ if (MI->getOperand(0).isReg())
+ break;
+ return true;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineir:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A2_combinew:
+ case Hexagon::CONST64_Int_Real:
+
+ case Hexagon::A2_sxtw:
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ case Hexagon::S2_asl_i_p_or:
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ break;
+ }
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ return true;
+ }
+ return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+ typedef std::map<unsigned,unsigned> UUMap;
+ typedef std::vector<unsigned> UVect;
+
+ unsigned NumRegs = MRI->getNumVirtRegs();
+ BitVector DoubleRegs(NumRegs);
+ for (unsigned i = 0; i < NumRegs; ++i) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(i);
+ if (MRI->getRegClass(R) == DoubleRC)
+ DoubleRegs.set(i);
+ }
+
+ BitVector FixedRegs(NumRegs);
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ MachineInstr *DefI = MRI->getVRegDef(R);
+ // In some cases a register may exist, but never be defined or used.
+ // It should never appear anywhere, but mark it as "fixed", just to be
+ // safe.
+ if (!DefI || isFixedInstr(DefI))
+ FixedRegs.set(x);
+ }
+
+ UUSetMap AssocMap;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ if (FixedRegs[x])
+ continue;
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+ USet &Asc = AssocMap[R];
+ for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+ U != Z; ++U) {
+ MachineOperand &Op = *U;
+ MachineInstr *UseI = Op.getParent();
+ if (isFixedInstr(UseI))
+ continue;
+ for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+ MachineOperand &MO = UseI->getOperand(i);
+ // Skip non-registers or registers with subregisters.
+ if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+ continue;
+ unsigned T = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(T)) {
+ FixedRegs.set(x);
+ continue;
+ }
+ if (MRI->getRegClass(T) != DoubleRC)
+ continue;
+ unsigned u = TargetRegisterInfo::virtReg2Index(T);
+ if (FixedRegs[u])
+ continue;
+ DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+ Asc.insert(T);
+ // Make it symmetric.
+ AssocMap[T].insert(R);
+ }
+ }
+ DEBUG(dbgs() << '\n');
+ }
+
+ UUMap R2P;
+ unsigned NextP = 1;
+ USet Visited;
+ for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+ unsigned R = TargetRegisterInfo::index2VirtReg(x);
+ if (Visited.count(R))
+ continue;
+ // Create a new partition for R.
+ unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+ UVect WorkQ;
+ WorkQ.push_back(R);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ unsigned T = WorkQ[i];
+ if (Visited.count(T))
+ continue;
+ R2P[T] = ThisP;
+ Visited.insert(T);
+ // Add all registers associated with T.
+ USet &Asc = AssocMap[T];
+ for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+ WorkQ.push_back(*J);
+ }
+ }
+
+ for (auto I : R2P)
+ P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+ int32_t P = 0;
+ bool LoZ1 = false, HiZ1 = false;
+ if (Lo == 0 || Lo == 0xFFFFFFFF)
+ P += 10, LoZ1 = true;
+ if (Hi == 0 || Hi == 0xFFFFFFFF)
+ P += 10, HiZ1 = true;
+ if (!LoZ1 && !HiZ1 && Lo == Hi)
+ P += 3;
+ return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+ unsigned ImmX = 0;
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ for (const auto &Op : MI->operands())
+ if (!Op.getSubReg())
+ return 0;
+ return 10;
+ case TargetOpcode::COPY:
+ if (MI->getOperand(1).getSubReg() != 0)
+ return 10;
+ return 0;
+
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::S2_storerd_io:
+ return -1;
+ case Hexagon::L2_loadrd_pi:
+ case Hexagon::S2_storerd_pi:
+ return 2;
+
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST64_Int_Real: {
+ uint64_t D = MI->getOperand(1).getImm();
+ unsigned Lo = D & 0xFFFFFFFFULL;
+ unsigned Hi = D >> 32;
+ return profitImm(Lo, Hi);
+ }
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ return profitImm(MI->getOperand(1).getImm(),
+ MI->getOperand(2).getImm());
+ case Hexagon::A4_combineri:
+ ImmX++;
+ case Hexagon::A4_combineir: {
+ ImmX++;
+ int64_t V = MI->getOperand(ImmX).getImm();
+ if (V == 0 || V == -1)
+ return 10;
+ // Fall through into A2_combinew.
+ }
+ case Hexagon::A2_combinew:
+ return 2;
+
+ case Hexagon::A2_sxtw:
+ return 3;
+
+ case Hexagon::A2_andp:
+ case Hexagon::A2_orp:
+ case Hexagon::A2_xorp:
+ return 1;
+
+ case Hexagon::S2_asl_i_p_or: {
+ unsigned S = MI->getOperand(3).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ return -1;
+ }
+ case Hexagon::S2_asl_i_p:
+ case Hexagon::S2_asr_i_p:
+ case Hexagon::S2_lsr_i_p:
+ unsigned S = MI->getOperand(2).getImm();
+ if (S == 0 || S == 32)
+ return 10;
+ if (S == 16)
+ return 5;
+ if (S == 48)
+ return 7;
+ return -10;
+ }
+
+ return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+ const {
+ unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ int32_t TotalP = 0;
+
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ int32_t P = profit(DefI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ // Reduce the profitability of splitting induction registers.
+ if (isInduction(DR, IRM))
+ TotalP -= 30;
+
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U) {
+ MachineInstr *UseI = U->getParent();
+ if (isFixedInstr(UseI)) {
+ FixedNum++;
+ // Calculate the cost of generating REG_SEQUENCE instructions.
+ for (auto &Op : UseI->operands()) {
+ if (Op.isReg() && Part.count(Op.getReg()))
+ if (Op.getSubReg())
+ TotalP -= 2;
+ }
+ continue;
+ }
+ // If a register from this partition is used in a fixed instruction,
+ // and there is also a register in this partition that is used in
+ // a loop phi node, then decrease the splitting profit as this can
+ // confuse the modulo scheduler.
+ if (UseI->isPHI()) {
+ const MachineBasicBlock *PB = UseI->getParent();
+ const MachineLoop *L = MLI->getLoopFor(PB);
+ if (L && L->getHeader() == PB)
+ LoopPhiNum++;
+ }
+ // Splittable instruction.
+ SplitNum++;
+ int32_t P = profit(UseI);
+ if (P == INT_MIN)
+ return false;
+ TotalP += P;
+ }
+ }
+
+ if (FixedNum > 0 && LoopPhiNum > 0)
+ TotalP -= 20*LoopPhiNum;
+
+ DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+ USet &Rs) {
+ const MachineBasicBlock *HB = L->getHeader();
+ const MachineBasicBlock *LB = L->getLoopLatch();
+ if (!HB || !LB)
+ return;
+
+ // Examine the latch branch. Expect it to be a conditional branch to
+ // the header (either "br-cond header" or "br-cond exit; br header").
+ MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+ SmallVector<MachineOperand,2> Cond;
+ bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+ // Only analyzable conditional branches. HII::AnalyzeBranch will put
+ // the branch opcode as the first element of Cond, and the predicate
+ // operand as the second.
+ if (BadLB || Cond.size() != 2)
+ return;
+ // Only simple jump-conditional (with or without negation).
+ if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+ return;
+ // Must go to the header.
+ if (TB != HB && FB != HB)
+ return;
+ assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+ // Expect a predicate register.
+ unsigned PR = Cond[1].getReg();
+ assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+ // Get the registers on which the loop controlling compare instruction
+ // depends.
+ unsigned CmpR1 = 0, CmpR2 = 0;
+ const MachineInstr *CmpI = MRI->getVRegDef(PR);
+ while (CmpI->getOpcode() == Hexagon::C2_not)
+ CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+ int Mask = 0, Val = 0;
+ bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+ if (!OkCI)
+ return;
+ // Eliminate non-double input registers.
+ if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+ CmpR1 = 0;
+ if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+ CmpR2 = 0;
+ if (!CmpR1 && !CmpR2)
+ return;
+
+ // Now examine the top of the loop: the phi nodes that could poten-
+ // tially define loop induction registers. The registers defined by
+ // such a phi node would be used in a 64-bit add, which then would
+ // be used in the loop compare instruction.
+
+ // Get the set of all double registers defined by phi nodes in the
+ // loop header.
+ typedef std::vector<unsigned> UVect;
+ UVect DP;
+ for (auto &MI : *HB) {
+ if (!MI.isPHI())
+ break;
+ const MachineOperand &MD = MI.getOperand(0);
+ unsigned R = MD.getReg();
+ if (MRI->getRegClass(R) == DoubleRC)
+ DP.push_back(R);
+ }
+ if (DP.empty())
+ return;
+
+ auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+ for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ const MachineInstr *UseI = I->getParent();
+ if (UseI->getOpcode() != Hexagon::A2_addp)
+ continue;
+ // Get the output from the add. If it is one of the inputs to the
+ // loop-controlling compare instruction, then R is likely an induc-
+ // tion register.
+ unsigned T = UseI->getOperand(0).getReg();
+ if (T == CmpR1 || T == CmpR2)
+ return false;
+ }
+ return true;
+ };
+ UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+ Rs.insert(DP.begin(), End);
+ Rs.insert(CmpR1);
+ Rs.insert(CmpR2);
+
+ DEBUG({
+ dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+ dump_partition(dbgs(), Rs, *TRI);
+ dbgs() << '\n';
+ });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+ typedef std::vector<MachineLoop*> LoopVector;
+ LoopVector WorkQ;
+
+ for (auto I : *MLI)
+ WorkQ.push_back(I);
+ for (unsigned i = 0; i < WorkQ.size(); ++i) {
+ for (auto I : *WorkQ[i])
+ WorkQ.push_back(I);
+ }
+
+ USet Rs;
+ for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+ MachineLoop *L = WorkQ[i];
+ Rs.clear();
+ collectIndRegsForLoop(L, Rs);
+ if (!Rs.empty())
+ IRM.insert(std::make_pair(L, Rs));
+ }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+ const UUPairMap &PairMap, unsigned SubR) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg()) {
+ NewI->addOperand(Op);
+ continue;
+ }
+ // For register operands, set the subregister.
+ unsigned R = Op.getReg();
+ unsigned SR = Op.getSubReg();
+ bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+ bool isKill = Op.isKill();
+ if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+ isKill = false;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end()) {
+ SR = SubR;
+ } else {
+ const UUPair &P = F->second;
+ R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+ SR = 0;
+ }
+ }
+ auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+ Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+ Op.isInternalRead());
+ NewI->addOperand(CO);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ bool Load = MI->mayLoad();
+ unsigned OrigOpc = MI->getOpcode();
+ bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+ OrigOpc == Hexagon::S2_storerd_pi);
+ MachineInstr *LowI, *HighI;
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Index of the base-address-register operand.
+ unsigned AdrX = PostInc ? (Load ? 2 : 1)
+ : (Load ? 1 : 0);
+ MachineOperand &AdrOp = MI->getOperand(AdrX);
+ unsigned RSA = getRegState(AdrOp);
+ MachineOperand &ValOp = Load ? MI->getOperand(0)
+ : (PostInc ? MI->getOperand(3)
+ : MI->getOperand(2));
+ UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+ assert(F != PairMap.end());
+
+ if (Load) {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4);
+ } else {
+ const UUPair &P = F->second;
+ int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+ LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off)
+ .addReg(P.first);
+ HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+ .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+ .addImm(Off+4)
+ .addReg(P.second);
+ }
+
+ if (PostInc) {
+ // Create the increment of the address register.
+ int64_t Inc = Load ? MI->getOperand(3).getImm()
+ : MI->getOperand(2).getImm();
+ MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+ const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+ unsigned NewR = MRI->createVirtualRegister(RC);
+ assert(!UpdOp.getSubReg() && "Def operand with subreg");
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+ .addReg(AdrOp.getReg(), RSA)
+ .addImm(Inc);
+ MRI->replaceRegWith(UpdOp.getReg(), NewR);
+ // The original instruction will be deleted later.
+ }
+
+ // Generate a new pair of memory-operands.
+ MachineFunction &MF = *B.getParent();
+ for (auto &MO : MI->memoperands()) {
+ const MachinePointerInfo &Ptr = MO->getPointerInfo();
+ unsigned F = MO->getFlags();
+ int A = MO->getAlignment();
+
+ auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+ LowI->addMemOperand(MF, Tmp1);
+ auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+ HighI->addMemOperand(MF, Tmp2);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isImm());
+ uint64_t V = Op1.getImm();
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+ // values in MachineOperand are stored as 64-bit integers, and so the
+ // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+ // will have the 32 higher bits truncated in the end, but -1 will remain
+ // as -1, while the latter may appear to be a large unsigned value
+ // requiring a constant extender. The casting to int32_t will select the
+ // former representation. (The same reasoning applies to all 32-bit
+ // values.)
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(int32_t(V & 0xFFFFFFFFULL));
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+
+ if (Op1.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+ .addImm(Op1.getImm());
+ } else if (Op1.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+ .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+
+ if (Op2.isImm()) {
+ BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+ .addImm(Op2.getImm());
+ } else if (Op2.isReg()) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+ } else
+ llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ assert(Op0.isReg() && Op1.isReg());
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned RS = getRegState(Op1);
+
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+ BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+ .addReg(Op1.getReg(), RS, Op1.getSubReg())
+ .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+ int64_t Sh64 = Op2.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ unsigned Opc = MI->getOpcode();
+ bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+ bool Left = !Right;
+ bool Signed = (Opc == S2_asr_i_p);
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS = getRegState(Op1);
+ unsigned ShiftOpc = Left ? S2_asl_i_r
+ : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ if (S == 0) {
+ // No shift, subregister copy.
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+ .addReg(Op1.getReg(), RS, HiSR);
+ } else if (S < 32) {
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+ unsigned TmpR = MRI->createVirtualRegister(IntRC);
+ // Expansion:
+ // Shift left: DR = shl R, #s
+ // LoR = shl R.lo, #s
+ // TmpR = extractu R.lo, #s, #32-s
+ // HiR = or (TmpR, asl(R.hi, #s))
+ // Shift right: DR = shr R, #s
+ // HiR = shr R.hi, #s
+ // TmpR = shr R.lo, #s
+ // LoR = insert TmpR, R.hi, #s, #32-s
+
+ // Shift left:
+ // LoR = shl R.lo, #s
+ // Shift right:
+ // TmpR = shr R.lo, #s
+
+ // Make a special case for A2_aslh and A2_asrh (they are predicable as
+ // opposed to S2_asl_i_r/S2_asr_i_r).
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S);
+
+ if (Left) {
+ // TmpR = extractu R.lo, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ // HiR = or (TmpR, asl(R.hi, #s))
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S);
+ } else {
+ // HiR = shr R.hi, #s
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+ .addImm(S);
+ // LoR = insert TmpR, R.hi, #s, #32-s
+ BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+ .addReg(TmpR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(S)
+ .addImm(32-S);
+ }
+ } else if (S == 32) {
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+ if (!Signed)
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ else // Must be right shift.
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ } else if (S < 64) {
+ S -= 32;
+ if (S == 16 && Left)
+ BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+ else if (S == 16 && Signed)
+ BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+ else
+ BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+ .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+ .addImm(S);
+
+ if (Signed)
+ BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+ .addReg(Op1.getReg(), RS, HiSR)
+ .addImm(31);
+ else
+ BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+ .addImm(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineOperand &Op0 = MI->getOperand(0);
+ MachineOperand &Op1 = MI->getOperand(1);
+ MachineOperand &Op2 = MI->getOperand(2);
+ MachineOperand &Op3 = MI->getOperand(3);
+ assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+ int64_t Sh64 = Op3.getImm();
+ assert(Sh64 >= 0 && Sh64 < 64);
+ unsigned S = Sh64;
+
+ UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+ assert(F != PairMap.end());
+ const UUPair &P = F->second;
+ unsigned LoR = P.first;
+ unsigned HiR = P.second;
+ using namespace Hexagon;
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned RS1 = getRegState(Op1);
+ unsigned RS2 = getRegState(Op2);
+ const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+ unsigned LoSR = subreg_loreg;
+ unsigned HiSR = subreg_hireg;
+
+ // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+ // means: Op0 = or (Op1, asl(Op2, Op3))
+
+ // Expansion of
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = or (R1.lo, asl(R2.lo, #s))
+ // Tmp1 = extractu R2.lo, #s, #32-s
+ // Tmp2 = or R1.hi, Tmp1
+ // HiR = or (Tmp2, asl(R2.hi, #s))
+
+ if (S == 0) {
+ // DR = or (R1, asl(R2, #0))
+ // -> or (R1, R2)
+ // i.e. LoR = or R1.lo, R2.lo
+ // HiR = or R1.hi, R2.hi
+ BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, HiSR);
+ } else if (S < 32) {
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S);
+ unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+ .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+ .addImm(S)
+ .addImm(32-S);
+ unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+ BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(TmpR1);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(TmpR2)
+ .addReg(Op2.getReg(), RS2, HiSR)
+ .addImm(S);
+ } else if (S == 32) {
+ // DR = or (R1, asl(R2, #32))
+ // -> or R1, R2.lo
+ // LoR = R1.lo
+ // HiR = or R1.hi, R2.lo
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR);
+ } else if (S < 64) {
+ // DR = or (R1, asl(R2, #s))
+ //
+ // LoR = R1:lo
+ // HiR = or (R1:hi, asl(R2:lo, #s-32))
+ S -= 32;
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+ .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+ BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+ .addReg(Op1.getReg(), RS1, HiSR)
+ .addReg(Op2.getReg(), RS2, LoSR)
+ .addImm(S);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ DEBUG(dbgs() << "Splitting: " << *MI);
+ bool Split = false;
+ unsigned Opc = MI->getOpcode();
+ using namespace Hexagon;
+
+ switch (Opc) {
+ case TargetOpcode::PHI:
+ case TargetOpcode::COPY: {
+ unsigned DstR = MI->getOperand(0).getReg();
+ if (MRI->getRegClass(DstR) == DoubleRC) {
+ createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+ createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+ Split = true;
+ }
+ break;
+ }
+ case A2_andp:
+ createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_orp:
+ createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+ case A2_xorp:
+ createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+ createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+ Split = true;
+ break;
+
+ case L2_loadrd_io:
+ case L2_loadrd_pi:
+ case S2_storerd_io:
+ case S2_storerd_pi:
+ splitMemRef(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_tfrpi:
+ case CONST64_Int_Real:
+ splitImmediate(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_combineii:
+ case A4_combineir:
+ case A4_combineii:
+ case A4_combineri:
+ case A2_combinew:
+ splitCombine(MI, PairMap);
+ Split = true;
+ break;
+
+ case A2_sxtw:
+ splitExt(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p:
+ case S2_asr_i_p:
+ case S2_lsr_i_p:
+ splitShift(MI, PairMap);
+ Split = true;
+ break;
+
+ case S2_asl_i_p_or:
+ splitAslOr(MI, PairMap);
+ Split = true;
+ break;
+
+ default:
+ llvm_unreachable("Instruction not splitable");
+ return false;
+ }
+
+ return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+ continue;
+ unsigned R = Op.getReg();
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &P = F->second;
+ switch (Op.getSubReg()) {
+ case Hexagon::subreg_loreg:
+ Op.setReg(P.first);
+ break;
+ case Hexagon::subreg_hireg:
+ Op.setReg(P.second);
+ break;
+ }
+ Op.setSubReg(0);
+ }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+ const UUPairMap &PairMap) {
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = MI->getDebugLoc();
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+ continue;
+ UUPairMap::const_iterator F = PairMap.find(R);
+ if (F == PairMap.end())
+ continue;
+ const UUPair &Pr = F->second;
+ unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+ BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+ .addReg(Pr.first)
+ .addImm(Hexagon::subreg_loreg)
+ .addReg(Pr.second)
+ .addImm(Hexagon::subreg_hireg);
+ Op.setReg(NewDR);
+ }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+ const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+ typedef std::set<MachineInstr*> MISet;
+ bool Changed = false;
+
+ DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+ dbgs() << '\n');
+
+ UUPairMap PairMap;
+
+ MISet SplitIns;
+ for (unsigned DR : Part) {
+ MachineInstr *DefI = MRI->getVRegDef(DR);
+ SplitIns.insert(DefI);
+
+ // Collect all instructions, including fixed ones. We won't split them,
+ // but we need to visit them again to insert the REG_SEQUENCE instructions.
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ SplitIns.insert(U->getParent());
+
+ unsigned LoR = MRI->createVirtualRegister(IntRC);
+ unsigned HiR = MRI->createVirtualRegister(IntRC);
+ DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+ << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+ PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+ }
+
+ MISet Erase;
+ for (auto MI : SplitIns) {
+ if (isFixedInstr(MI)) {
+ collapseRegPairs(MI, PairMap);
+ } else {
+ bool Done = splitInstr(MI, PairMap);
+ if (Done)
+ Erase.insert(MI);
+ Changed |= Done;
+ }
+ }
+
+ for (unsigned DR : Part) {
+ // Before erasing "double" instructions, revisit all uses of the double
+ // registers in this partition, and replace all uses of them with subre-
+ // gisters, with the corresponding single registers.
+ MISet Uses;
+ for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+ U != W; ++U)
+ Uses.insert(U->getParent());
+ for (auto M : Uses)
+ replaceSubregUses(M, PairMap);
+ }
+
+ for (auto MI : Erase) {
+ MachineBasicBlock *B = MI->getParent();
+ B->erase(MI);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "Splitting double registers in function: "
+ << MF.getName() << '\n');
+
+ auto &ST = MF.getSubtarget<HexagonSubtarget>();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+
+ UUSetMap P2Rs;
+ LoopRegMap IRM;
+
+ collectIndRegs(IRM);
+ partitionRegisters(P2Rs);
+
+ DEBUG({
+ dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ dbgs() << '#' << I->first << " -> ";
+ dump_partition(dbgs(), I->second, *TRI);
+ dbgs() << '\n';
+ }
+ });
+
+ bool Changed = false;
+ int Limit = MaxHSDR;
+
+ for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+ if (I->first == 0)
+ continue;
+ if (Limit >= 0 && Counter >= Limit)
+ break;
+ USet &Part = I->second;
+ DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+ if (!isProfitable(Part, IRM))
+ continue;
+ Counter++;
+ Changed |= splitPartition(Part);
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+ return new HexagonSplitDoubleRegs();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
new file mode 100644
index 0000000..b5339ff
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -0,0 +1,616 @@
+//===--- HexagonStoreWidening.cpp------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+// S4_storeirb_io %vreg100, 0, 0 ; store-immediate-byte
+// S4_storeirb_io %vreg100, 1, 0 ; store-immediate-byte
+// with
+// S4_storeirh_io %vreg100, 0, 0 ; store-immediate-halfword
+// The above is the general idea. The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure". Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-widen-stores"
+
+#include "HexagonTargetMachine.h"
+
+#include "llvm/PassSupport.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <algorithm>
+
+
+using namespace llvm;
+
+namespace llvm {
+ FunctionPass *createHexagonStoreWidening();
+ void initializeHexagonStoreWideningPass(PassRegistry&);
+}
+
+namespace {
+ struct HexagonStoreWidening : public MachineFunctionPass {
+ const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ AliasAnalysis *AA;
+ MachineFunction *MF;
+
+ public:
+ static char ID;
+ HexagonStoreWidening() : MachineFunctionPass(ID) {
+ initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "Hexagon Store Widening";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static bool handledStoreType(const MachineInstr *MI);
+
+ private:
+ static const int MaxWideSize = 4;
+
+ typedef std::vector<MachineInstr*> InstrGroup;
+ typedef std::vector<InstrGroup> InstrGroupList;
+
+ bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
+ bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
+ void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &Group);
+ void createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups);
+ bool processBasicBlock(MachineBasicBlock &MBB);
+ bool processStoreGroup(InstrGroup &Group);
+ bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
+ InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+ bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+ bool replaceStores(InstrGroup &OG, InstrGroup &NG);
+ bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+ };
+
+} // namespace
+
+
+namespace {
+
+// Some local helper functions...
+unsigned getBaseAddressRegister(const MachineInstr *MI) {
+ const MachineOperand &MO = MI->getOperand(0);
+ assert(MO.isReg() && "Expecting register operand");
+ return MO.getReg();
+}
+
+int64_t getStoreOffset(const MachineInstr *MI) {
+ unsigned OpC = MI->getOpcode();
+ assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
+
+ switch (OpC) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io: {
+ const MachineOperand &MO = MI->getOperand(1);
+ assert(MO.isImm() && "Expecting immediate offset");
+ return MO.getImm();
+ }
+ }
+ dbgs() << *MI;
+ llvm_unreachable("Store offset calculation missing for a handled opcode");
+ return 0;
+}
+
+const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+ assert(!MI->memoperands_empty() && "Expecting memory operands");
+ return **MI->memoperands_begin();
+}
+
+} // namespace
+
+
+char HexagonStoreWidening::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexason Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+ "Hexagon Store Widening", false, false)
+
+
+// Filtering function: any stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
+ // For now, only handle stores of immediate values.
+ // Also, reject stores to stack slots.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::S4_storeirb_io:
+ case Hexagon::S4_storeirh_io:
+ case Hexagon::S4_storeiri_io:
+ // Base address must be a register. (Implement FI later.)
+ return MI->getOperand(0).isReg();
+ default:
+ return false;
+ }
+}
+
+
+// Check if the machine memory operand MMO is aliased with any of the
+// stores in the store group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineMemOperand &MMO) {
+ if (!MMO.getValue())
+ return true;
+
+ MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
+
+ for (auto SI : Stores) {
+ const MachineMemOperand &SMO = getStoreTarget(SI);
+ if (!SMO.getValue())
+ return true;
+
+ MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
+ if (AA->alias(L, SL))
+ return true;
+ }
+
+ return false;
+}
+
+
+// Check if the machine instruction MI accesses any storage aliased with
+// any store in the group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+ const MachineInstr *MI) {
+ for (auto &I : MI->memoperands())
+ if (instrAliased(Stores, *I))
+ return true;
+ return false;
+}
+
+
+// Inspect a machine basic block, and generate store groups out of stores
+// encountered in the block.
+//
+// A store group is a group of stores that use the same base register,
+// and which can be reordered within that group without altering the
+// semantics of the program. A single store group could be widened as
+// a whole, if there existed a single store instruction with the same
+// semantics as the entire group. In many cases, a single store group
+// may need more than one wide store.
+void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
+ InstrGroupList &StoreGroups) {
+ InstrGroup AllInsns;
+
+ // Copy all instruction pointers from the basic block to a temporary
+ // list. This will allow operating on the list, and modifying its
+ // elements without affecting the basic block.
+ for (auto &I : MBB)
+ AllInsns.push_back(&I);
+
+ // Traverse all instructions in the AllInsns list, and if we encounter
+ // a store, then try to create a store group starting at that instruction
+ // i.e. a sequence of independent stores that can be widened.
+ for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ // Skip null pointers (processed instructions).
+ if (!MI || !handledStoreType(MI))
+ continue;
+
+ // Found a store. Try to create a store group.
+ InstrGroup G;
+ createStoreGroup(MI, I+1, E, G);
+ if (G.size() > 1)
+ StoreGroups.push_back(G);
+ }
+}
+
+
+// Create a single store group. The stores need to be independent between
+// themselves, and also there cannot be other instructions between them
+// that could read or modify storage being stored into.
+void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
+ InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
+ assert(handledStoreType(BaseStore) && "Unexpected instruction");
+ unsigned BaseReg = getBaseAddressRegister(BaseStore);
+ InstrGroup Other;
+
+ Group.push_back(BaseStore);
+
+ for (auto I = Begin; I != End; ++I) {
+ MachineInstr *MI = *I;
+ if (!MI)
+ continue;
+
+ if (handledStoreType(MI)) {
+ // If this store instruction is aliased with anything already in the
+ // group, terminate the group now.
+ if (instrAliased(Group, getStoreTarget(MI)))
+ return;
+ // If this store is aliased to any of the memory instructions we have
+ // seen so far (that are not a part of this group), terminate the group.
+ if (instrAliased(Other, getStoreTarget(MI)))
+ return;
+
+ unsigned BR = getBaseAddressRegister(MI);
+ if (BR == BaseReg) {
+ Group.push_back(MI);
+ *I = 0;
+ continue;
+ }
+ }
+
+ // Assume calls are aliased to everything.
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ return;
+
+ if (MI->mayLoad() || MI->mayStore()) {
+ if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
+ return;
+ Other.push_back(MI);
+ }
+ } // for
+}
+
+
+// Check if store instructions S1 and S2 are adjacent. More precisely,
+// S2 has to access memory immediately following that accessed by S1.
+bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
+ const MachineInstr *S2) {
+ if (!handledStoreType(S1) || !handledStoreType(S2))
+ return false;
+
+ const MachineMemOperand &S1MO = getStoreTarget(S1);
+
+ // Currently only handling immediate stores.
+ int Off1 = S1->getOperand(1).getImm();
+ int Off2 = S2->getOperand(1).getImm();
+
+ return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
+ : int(Off1+S1MO.getSize()) == Off2;
+}
+
+
+/// Given a sequence of adjacent stores, and a maximum size of a single wide
+/// store, pick a group of stores that can be replaced by a single store
+/// of size not exceeding MaxSize. The selected sequence will be recorded
+/// in OG ("old group" of instructions).
+/// OG should be empty on entry, and should be left empty if the function
+/// fails.
+bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
+ InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
+ unsigned MaxSize) {
+ assert(Begin != End && "No instructions to analyze");
+ assert(OG.empty() && "Old group not empty on entry");
+
+ if (std::distance(Begin, End) <= 1)
+ return false;
+
+ MachineInstr *FirstMI = *Begin;
+ assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
+ const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
+ unsigned Alignment = FirstMMO.getAlignment();
+ unsigned SizeAccum = FirstMMO.getSize();
+ unsigned FirstOffset = getStoreOffset(FirstMI);
+
+ // The initial value of SizeAccum should always be a power of 2.
+ assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
+
+ // If the size of the first store equals to or exceeds the limit, do nothing.
+ if (SizeAccum >= MaxSize)
+ return false;
+
+ // If the size of the first store is greater than or equal to the address
+ // stored to, then the store cannot be made any wider.
+ if (SizeAccum >= Alignment)
+ return false;
+
+ // The offset of a store will put restrictions on how wide the store can be.
+ // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
+ // If the first store already exhausts the offset limits, quit. Test this
+ // by checking if the next wider size would exceed the limit.
+ if ((2*SizeAccum-1) & FirstOffset)
+ return false;
+
+ OG.push_back(FirstMI);
+ MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
+ InstrGroup::iterator I = Begin+1;
+
+ // Pow2Num will be the largest number of elements in OG such that the sum
+ // of sizes of stores 0...Pow2Num-1 will be a power of 2.
+ unsigned Pow2Num = 1;
+ unsigned Pow2Size = SizeAccum;
+
+ // Be greedy: keep accumulating stores as long as they are to adjacent
+ // memory locations, and as long as the total number of bytes stored
+ // does not exceed the limit (MaxSize).
+ // Keep track of when the total size covered is a power of 2, since
+ // this is a size a single store can cover.
+ while (I != End) {
+ S2 = *I;
+ // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
+ // any other store to fill the "hole".
+ if (!storesAreAdjacent(S1, S2))
+ break;
+
+ unsigned S2Size = getStoreTarget(S2).getSize();
+ if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
+ break;
+
+ OG.push_back(S2);
+ SizeAccum += S2Size;
+ if (isPowerOf2_32(SizeAccum)) {
+ Pow2Num = OG.size();
+ Pow2Size = SizeAccum;
+ }
+ if ((2*Pow2Size-1) & FirstOffset)
+ break;
+
+ S1 = S2;
+ ++I;
+ }
+
+ // The stores don't add up to anything that can be widened. Clean up.
+ if (Pow2Num <= 1) {
+ OG.clear();
+ return false;
+ }
+
+ // Only leave the stored being widened.
+ OG.resize(Pow2Num);
+ TotalSize = Pow2Size;
+ return true;
+}
+
+
+/// Given an "old group" OG of stores, create a "new group" NG of instructions
+/// to replace them. Ideally, NG would only have a single instruction in it,
+/// but that may only be possible for store-immediate.
+bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
+ unsigned TotalSize) {
+ // XXX Current limitations:
+ // - only expect stores of immediate values in OG,
+ // - only handle a TotalSize of up to 4.
+
+ if (TotalSize > 4)
+ return false;
+
+ unsigned Acc = 0; // Value accumulator.
+ unsigned Shift = 0;
+
+ for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
+ MachineInstr *MI = *I;
+ const MachineMemOperand &MMO = getStoreTarget(MI);
+ MachineOperand &SO = MI->getOperand(2); // Source.
+ assert(SO.isImm() && "Expecting an immediate operand");
+
+ unsigned NBits = MMO.getSize()*8;
+ unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
+ unsigned Val = (SO.getImm() & Mask) << Shift;
+ Acc |= Val;
+ Shift += NBits;
+ }
+
+
+ MachineInstr *FirstSt = OG.front();
+ DebugLoc DL = OG.back()->getDebugLoc();
+ const MachineMemOperand &OldM = getStoreTarget(FirstSt);
+ MachineMemOperand *NewM =
+ MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+ TotalSize, OldM.getAlignment(),
+ OldM.getAAInfo());
+
+ if (Acc < 0x10000) {
+ // Create mem[hw] = #Acc
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
+ (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addImm(Val);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ } else {
+ // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
+ const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
+ const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+ unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
+ .addImm(int(Acc));
+ NG.push_back(TfrI);
+
+ unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
+ (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
+ assert(WOpc && "Unexpected size");
+
+ const MCInstrDesc &StD = TII->get(WOpc);
+ MachineOperand &MR = FirstSt->getOperand(0);
+ int64_t Off = FirstSt->getOperand(1).getImm();
+ MachineInstr *StI = BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+ .addImm(Off)
+ .addReg(VReg, RegState::Kill);
+ StI->addMemOperand(*MF, NewM);
+ NG.push_back(StI);
+ }
+
+ return true;
+}
+
+
+// Replace instructions from the old group OG with instructions from the
+// new group NG. Conceptually, remove all instructions in OG, and then
+// insert all instructions in NG, starting at where the first instruction
+// from OG was (in the order in which they appeared in the basic block).
+// (The ordering in OG does not have to match the order in the basic block.)
+bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
+ DEBUG({
+ dbgs() << "Replacing:\n";
+ for (auto I : OG)
+ dbgs() << " " << *I;
+ dbgs() << "with\n";
+ for (auto I : NG)
+ dbgs() << " " << *I;
+ });
+
+ MachineBasicBlock *MBB = OG.back()->getParent();
+ MachineBasicBlock::iterator InsertAt = MBB->end();
+
+ // Need to establish the insertion point. The best one is right before
+ // the first store in the OG, but in the order in which the stores occur
+ // in the program list. Since the ordering in OG does not correspond
+ // to the order in the program list, we need to do some work to find
+ // the insertion point.
+
+ // Create a set of all instructions in OG (for quick lookup).
+ SmallPtrSet<MachineInstr*, 4> InstrSet;
+ for (auto I : OG)
+ InstrSet.insert(I);
+
+ // Traverse the block, until we hit an instruction from OG.
+ for (auto &I : *MBB) {
+ if (InstrSet.count(&I)) {
+ InsertAt = I;
+ break;
+ }
+ }
+
+ assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
+
+ bool AtBBStart = false;
+
+ // InsertAt points at the first instruction that will be removed. We need
+ // to move it out of the way, so it remains valid after removing all the
+ // old stores, and so we are able to recover it back to the proper insertion
+ // position.
+ if (InsertAt != MBB->begin())
+ --InsertAt;
+ else
+ AtBBStart = true;
+
+ for (auto I : OG)
+ I->eraseFromParent();
+
+ if (!AtBBStart)
+ ++InsertAt;
+ else
+ InsertAt = MBB->begin();
+
+ for (auto I : NG)
+ MBB->insert(InsertAt, I);
+
+ return true;
+}
+
+
+// Break up the group into smaller groups, each of which can be replaced by
+// a single wide store. Widen each such smaller group and replace the old
+// instructions with the widened ones.
+bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
+ bool Changed = false;
+ InstrGroup::iterator I = Group.begin(), E = Group.end();
+ InstrGroup OG, NG; // Old and new groups.
+ unsigned CollectedSize;
+
+ while (I != E) {
+ OG.clear();
+ NG.clear();
+
+ bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
+ createWideStores(OG, NG, CollectedSize) &&
+ replaceStores(OG, NG);
+ if (!Succ)
+ continue;
+
+ assert(OG.size() > 1 && "Created invalid group");
+ assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
+ I += OG.size()-1;
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+
+// Process a single basic block: create the store groups, and replace them
+// with the widened stores, if possible. Processing of each basic block
+// is independent from processing of any other basic block. This transfor-
+// mation could be stopped after having processed any basic block without
+// any ill effects (other than not having performed widening in the unpro-
+// cessed blocks). Also, the basic blocks can be processed in any order.
+bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
+ InstrGroupList SGs;
+ bool Changed = false;
+
+ createStoreGroups(MBB, SGs);
+
+ auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
+ return getStoreOffset(A) < getStoreOffset(B);
+ };
+ for (auto &G : SGs) {
+ assert(G.size() > 1 && "Store group with fewer than 2 elements");
+ std::sort(G.begin(), G.end(), Less);
+
+ Changed |= processStoreGroup(G);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
+ MF = &MFn;
+ auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ MRI = &MFn.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+ bool Changed = false;
+
+ for (auto &B : MFn)
+ Changed |= processBasicBlock(B);
+
+ return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonStoreWidening() {
+ return new HexagonStoreWidening();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index cd482b3..aa0efd4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -16,6 +16,8 @@
#include "HexagonRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
+#include <map>
+
using namespace llvm;
#define DEBUG_TYPE "hexagon-subtarget"
@@ -24,49 +26,65 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
-static cl::opt<bool>
-EnableV3("enable-hexagon-v3", cl::Hidden,
- cl::desc("Enable Hexagon V3 instructions."));
-
-static cl::opt<bool>
-EnableMemOps(
- "enable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
- cl::desc(
- "Generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool>
-DisableMemOps(
- "disable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
- cl::desc(
- "Do not generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool>
-EnableIEEERndNear(
- "enable-hexagon-ieee-rnd-near",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+ cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
+ cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+ cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
+
+static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Generate non-chopped conversion from fp to int."));
+
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Double Vector eXtensions"));
+
+static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Enable Hexagon Vector eXtensions"));
static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon MI Scheduling"));
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon MI Scheduling"));
+
+void HexagonSubtarget::initializeEnvironment() {
+ UseMemOps = false;
+ ModeIEEERndNear = false;
+ UseBSBScheduling = false;
+}
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
- // If the programmer has not specified a Hexagon version, default to -mv4.
- if (CPUString.empty())
- CPUString = "hexagonv4";
-
- if (CPUString == "hexagonv4") {
- HexagonArchVersion = V4;
- } else if (CPUString == "hexagonv5") {
- HexagonArchVersion = V5;
- } else {
+ CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
+
+ static std::map<StringRef, HexagonArchEnum> CpuTable {
+ { "hexagonv4", V4 },
+ { "hexagonv5", V5 },
+ { "hexagonv55", V55 },
+ { "hexagonv60", V60 },
+ };
+
+ auto foundIt = CpuTable.find(CPUString);
+ if (foundIt != CpuTable.end())
+ HexagonArchVersion = foundIt->second;
+ else
llvm_unreachable("Unrecognized Hexagon processor version");
- }
+ UseHVXOps = false;
+ UseHVXDblOps = false;
ParseSubtargetFeatures(CPUString, FS);
+
+ if (EnableHexagonHVX.getPosition())
+ UseHVXOps = EnableHexagonHVX;
+ if (EnableHexagonHVXDouble.getPosition())
+ UseHVXDblOps = EnableHexagonHVXDouble;
+
return *this;
}
@@ -76,6 +94,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
FrameLowering() {
+ initializeEnvironment();
+
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
@@ -91,6 +111,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
ModeIEEERndNear = true;
else
ModeIEEERndNear = false;
+
+ UseBSBScheduling = hasV60TOps() && EnableBSBSched;
}
// Pin the vtable to this file.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 34cdad7..c7ae139 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -34,15 +34,19 @@ namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
- bool UseMemOps;
+ bool UseMemOps, UseHVXOps, UseHVXDblOps;
bool ModeIEEERndNear;
public:
enum HexagonArchEnum {
- V4, V5
+ V4, V5, V55, V60
};
HexagonArchEnum HexagonArchVersion;
+ /// True if the target should use Back-Skip-Back scheduling. This is the
+ /// default for V60.
+ bool UseBSBScheduling;
+
private:
std::string CPUString;
HexagonInstrInfo InstrInfo;
@@ -50,6 +54,7 @@ private:
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
InstrItineraryData InstrItins;
+ void initializeEnvironment();
public:
HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -84,7 +89,16 @@ public:
bool useMemOps() const { return UseMemOps; }
bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+ bool hasV55TOps() const { return getHexagonArchVersion() >= V55; }
+ bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
+ bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
+ bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool useHVXOps() const { return UseHVXOps; }
+ bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
+ bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+
+ bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
@@ -98,7 +112,7 @@ public:
return Hexagon_SMALL_DATA_THRESHOLD;
}
const HexagonArchEnum &getHexagonArchVersion() const {
- return HexagonArchVersion;
+ return HexagonArchVersion;
}
};
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b504429..9dccd69 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -16,12 +16,12 @@
#include "HexagonISelLowering.h"
#include "HexagonMachineScheduler.h"
#include "HexagonTargetObjectFile.h"
+#include "HexagonTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
@@ -33,10 +33,16 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon CFG Optimization"));
+static cl::opt<bool> DisableStoreWidening("disable-store-widen",
+ cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
+
static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
cl::init(true), cl::Hidden, cl::ZeroOrMore,
cl::desc("Early expansion of MUX"));
+static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+
static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
cl::Hidden, cl::desc("Generate \"insert\" instructions"));
@@ -46,10 +52,22 @@ static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden,
+ cl::desc("Enable converting conditional transfers into MUX instructions"));
+
static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
"predicate instructions"));
+static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
+ cl::desc("Disable splitting double registers"));
+
+static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
+ cl::Hidden, cl::desc("Bit simplification"));
+
+static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
+ cl::Hidden, cl::desc("Loop rescheduling"));
+
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
/// library. In particular, it seems that it is not possible to get
@@ -72,23 +90,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
createVLIWMachineSched);
namespace llvm {
+ FunctionPass *createHexagonBitSimplify();
+ FunctionPass *createHexagonCallFrameInformation();
FunctionPass *createHexagonCFGOptimizer();
FunctionPass *createHexagonCommonGEP();
FunctionPass *createHexagonCopyToCombine();
+ FunctionPass *createHexagonEarlyIfConversion();
FunctionPass *createHexagonExpandCondsets();
FunctionPass *createHexagonExpandPredSpillCode();
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
+ FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createHexagonLoopRescheduling();
FunctionPass *createHexagonNewValueJump();
+ FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPeephole();
- FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
FunctionPass *createHexagonSplitConst32AndConst64();
+ FunctionPass *createHexagonSplitDoubleRegs();
+ FunctionPass *createHexagonStoreWidening();
} // end namespace llvm;
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
@@ -101,13 +126,46 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
- Options, RM, CM, OL),
- TLOF(make_unique<HexagonTargetObjectFile>()),
- Subtarget(TT, CPU, FS, *this) {
- initAsmInfo();
+ : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-"
+ "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-"
+ "n16:32", TT, CPU, FS, Options, RM, CM, OL),
+ TLOF(make_unique<HexagonTargetObjectFile>()) {
+ initAsmInfo();
+}
+
+const HexagonSubtarget *
+HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
+ AttributeSet FnAttrs = F.getAttributes();
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+ std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+ ? CPUAttr.getValueAsString().str()
+ : TargetCPU;
+ std::string FS = !FSAttr.hasAttribute(Attribute::None)
+ ? FSAttr.getValueAsString().str()
+ : TargetFS;
+
+ auto &I = SubtargetMap[CPU + FS];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+ }
+ return I.get();
+}
+
+TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(HexagonTTIImpl(this, F));
+ });
}
+
HexagonTargetMachine::~HexagonTargetMachine() {}
namespace {
@@ -166,7 +224,7 @@ bool HexagonPassConfig::addInstSelector() {
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
if (!NoOpt)
- addPass(createHexagonRemoveExtendArgs(TM));
+ addPass(createHexagonOptimizeSZextends());
addPass(createHexagonISelDag(TM, getOptLevel()));
@@ -174,19 +232,33 @@ bool HexagonPassConfig::addInstSelector() {
// Create logical operations on predicate registers.
if (EnableGenPred)
addPass(createHexagonGenPredicate(), false);
+ // Rotate loops to expose bit-simplification opportunities.
+ if (EnableLoopResched)
+ addPass(createHexagonLoopRescheduling(), false);
+ // Split double registers.
+ if (!DisableHSDR)
+ addPass(createHexagonSplitDoubleRegs());
+ // Bit simplification.
+ if (EnableBitSimplify)
+ addPass(createHexagonBitSimplify(), false);
addPass(createHexagonPeephole());
printAndVerify("After hexagon peephole pass");
if (EnableGenInsert)
addPass(createHexagonGenInsert(), false);
+ if (EnableEarlyIf)
+ addPass(createHexagonEarlyIfConversion(), false);
}
return false;
}
void HexagonPassConfig::addPreRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (!DisableStoreWidening)
+ addPass(createHexagonStoreWidening(), false);
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops(), false);
+ }
}
void HexagonPassConfig::addPostRegAlloc() {
@@ -215,6 +287,13 @@ void HexagonPassConfig::addPreEmitPass() {
if (!NoOpt) {
if (!DisableHardwareLoops)
addPass(createHexagonFixupHwLoops(), false);
+ // Generate MUX from pairs of conditional transfers.
+ if (EnableGenMux)
+ addPass(createHexagonGenMux(), false);
+
addPass(createHexagonPacketizer(), false);
}
+
+ // Add CFI instructions if necessary.
+ addPass(createHexagonCallFrameInformation(), false);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 115eadb..968814b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -16,6 +16,7 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -24,7 +25,7 @@ class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- HexagonSubtarget Subtarget;
+ mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
public:
HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -32,20 +33,18 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~HexagonTargetMachine() override;
- const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
- return &Subtarget;
- }
+ const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
+
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
- TargetLoweringObjectFile *getObjFileLowering() const override {
- return TLOF.get();
+ HexagonTargetObjectFile *getObjFileLowering() const override {
+ return static_cast<HexagonTargetObjectFile*>(TLOF.get());
}
};
-extern bool flag_aligned_memcpy;
-
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 4ea0e0d..ccca620 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -73,9 +73,10 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
if (!GVA)
return false;
- if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+ if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) {
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
}
return false;
@@ -89,7 +90,7 @@ HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
new file mode 100644
index 0000000..a05443e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagontti"
+
+TargetTransformInfo::PopcntSupportKind
+HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
+ // Return Fast Hardware support as every input < 64 bits will be promoted
+ // to 64 bits.
+ return TargetTransformInfo::PSK_FastHardware;
+}
+
+// The Hexagon target can unroll loops with run-time trip counts.
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+ TTI::UnrollingPreferences &UP) {
+ UP.Runtime = UP.Partial = true;
+}
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
+ return vector ? 0 : 32;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
new file mode 100644
index 0000000..71ae17a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
+ typedef BasicTTIImplBase<HexagonTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const HexagonSubtarget *ST;
+ const HexagonTargetLowering *TLI;
+
+ const HexagonSubtarget *getST() const { return ST; }
+ const HexagonTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ HexagonTTIImpl(const HexagonTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ HexagonTTIImpl(HexagonTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+ // The Hexagon target can unroll loops with run-time trip counts.
+ void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ unsigned getNumberOfRegisters(bool vector) const;
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index b91a3f6..8185054 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,35 +16,19 @@
// prune the dependence.
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
#include "HexagonTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "HexagonVLIWPacketizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <map>
#include <vector>
@@ -52,9 +36,22 @@ using namespace llvm;
#define DEBUG_TYPE "packets"
+static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
+ cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon packetizer pass"));
+
static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
- cl::ZeroOrMore, cl::Hidden, cl::init(true),
- cl::desc("Allow non-solo packetization of volatile memory references"));
+ cl::ZeroOrMore, cl::Hidden, cl::init(true),
+ cl::desc("Allow non-solo packetization of volatile memory references"));
+
+static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+
+static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
+ cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Disable vector double new-value-stores"));
+
+extern cl::opt<bool> ScheduleInlineAsm;
namespace llvm {
FunctionPass *createHexagonPacketizer();
@@ -64,7 +61,6 @@ namespace llvm {
namespace {
class HexagonPacketizer : public MachineFunctionPass {
-
public:
static char ID;
HexagonPacketizer() : MachineFunctionPass(ID) {
@@ -73,103 +69,25 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-
const char *getPassName() const override {
return "Hexagon Packetizer";
}
-
bool runOnMachineFunction(MachineFunction &Fn) override;
- };
- char HexagonPacketizer::ID = 0;
-
- class HexagonPacketizerList : public VLIWPacketizerList {
private:
-
- // Has the instruction been promoted to a dot-new instruction.
- bool PromotedToDotNew;
-
- // Has the instruction been glued to allocframe.
- bool GlueAllocframeStore;
-
- // Has the feeder instruction been glued to new value jump.
- bool GlueToNewValueJump;
-
- // Check if there is a dependence between some instruction already in this
- // packet and this instruction.
- bool Dependence;
-
- // Only check for dependence if there are resources available to
- // schedule this instruction.
- bool FoundSequentialDependence;
-
- /// \brief A handle to the branch probability pass.
- const MachineBranchProbabilityInfo *MBPI;
-
- // Track MIs with ignored dependece.
- std::vector<MachineInstr*> IgnoreDepMIs;
-
- public:
- // Ctor.
- HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
- const MachineBranchProbabilityInfo *MBPI);
-
- // initPacketizerState - initialize some internal flags.
- void initPacketizerState() override;
-
- // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) override;
-
- // isSoloInstruction - return true if instruction MI can not be packetized
- // with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) override;
-
- // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
- // together.
- bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
-
- // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
- // and SUJ.
- bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
-
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
- private:
- bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
- bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC);
- bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII,
- const TargetRegisterClass *RC);
- bool
- CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII);
- bool CanPromoteToNewValueStore(
- MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit);
- bool DemoteToDotOld(MachineInstr *MI);
- bool ArePredicatesComplements(
- MachineInstr *MI1, MachineInstr *MI2,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit);
- bool RestrictingDepExistInPacket(MachineInstr *, unsigned,
- const std::map<MachineInstr *, SUnit *> &);
- bool isNewifiable(MachineInstr* MI);
- bool isCondInst(MachineInstr* MI);
- bool tryAllocateResourcesForConstExt(MachineInstr* MI);
- bool canReserveResourcesForConstExt(MachineInstr *MI);
- void reserveResourcesForConstExt(MachineInstr* MI);
- bool isNewValueInst(MachineInstr* MI);
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
};
+
+ char HexagonPacketizer::ID = 0;
}
INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
@@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
false, false)
-// HexagonPacketizerList Ctor.
-HexagonPacketizerList::HexagonPacketizerList(
- MachineFunction &MF, MachineLoopInfo &MLI,
- const MachineBranchProbabilityInfo *MBPI)
- : VLIWPacketizerList(MF, MLI, true) {
- this->MBPI = MBPI;
+HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
+ MachineLoopInfo &MLI, AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI)
+ : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
}
-bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- const MachineBranchProbabilityInfo *MBPI =
- &getAnalysis<MachineBranchProbabilityInfo>();
+// Check if FirstI modifies a register that SecondI reads.
+static bool hasWriteToReadDep(const MachineInstr *FirstI,
+ const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
+ for (auto &MO : FirstI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned R = MO.getReg();
+ if (SecondI->readsRegister(R, TRI))
+ return true;
+ }
+ return false;
+}
+
+
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+ MachineBasicBlock::iterator BundleIt, bool Before) {
+ MachineBasicBlock::instr_iterator InsertPt;
+ if (Before)
+ InsertPt = BundleIt.getInstrIterator();
+ else
+ InsertPt = std::next(BundleIt).getInstrIterator();
+
+ MachineBasicBlock &B = *MI->getParent();
+ // The instruction should at least be bundled with the preceding instruction
+ // (there will always be one, i.e. BUNDLE, if nothing else).
+ assert(MI->isBundledWithPred());
+ if (MI->isBundledWithSucc()) {
+ MI->clearFlag(MachineInstr::BundledSucc);
+ MI->clearFlag(MachineInstr::BundledPred);
+ } else {
+ // If it's not bundled with the successor (i.e. it is the last one
+ // in the bundle), then we can simply unbundle it from the predecessor,
+ // which will take care of updating the predecessor's flag.
+ MI->unbundleFromPred();
+ }
+ B.splice(InsertPt, &B, MI);
+
+ // Get the size of the bundle without asserting.
+ MachineBasicBlock::const_instr_iterator I(BundleIt);
+ MachineBasicBlock::const_instr_iterator E = B.instr_end();
+ unsigned Size = 0;
+ for (++I; I != E && I->isBundledWithPred(); ++I)
+ ++Size;
+
+ // If there are still two or more instructions, then there is nothing
+ // else to be done.
+ if (Size > 1)
+ return BundleIt;
+
+ // Otherwise, extract the single instruction out and delete the bundle.
+ MachineBasicBlock::iterator NextIt = std::next(BundleIt);
+ MachineInstr *SingleI = BundleIt->getNextNode();
+ SingleI->unbundleFromPred();
+ assert(!SingleI->isBundledWithSucc());
+ BundleIt->eraseFromParent();
+ return NextIt;
+}
+
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePacketizer)
+ return false;
+
+ HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+ if (EnableGenAllInsnClass)
+ HII->genAllInsnTimingClasses(MF);
+
// Instantiate the packetizer.
- HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
+ HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock::iterator End = MBB->end();
- MachineBasicBlock::iterator MI = MBB->begin();
+ for (auto &MB : MF) {
+ auto End = MB.end();
+ auto MI = MB.begin();
while (MI != End) {
+ auto NextI = std::next(MI);
if (MI->isKill()) {
- MachineBasicBlock::iterator DeleteMI = MI;
- ++MI;
- MBB->erase(DeleteMI);
- End = MBB->end();
- continue;
+ MB.erase(MI);
+ End = MB.end();
}
- ++MI;
+ MI = NextI;
}
}
// Loop over all of the basic blocks.
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- // Find scheduling regions and schedule / packetize each region.
- unsigned RemainingCount = MBB->size();
- for(MachineBasicBlock::iterator RegionEnd = MBB->end();
- RegionEnd != MBB->begin();) {
- // The next region starts above the previous region. Look backward in the
- // instruction stream until we find the nearest boundary.
- MachineBasicBlock::iterator I = RegionEnd;
- for(;I != MBB->begin(); --I, --RemainingCount) {
- if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
- break;
- }
- I = MBB->begin();
-
- // Skip empty scheduling regions.
- if (I == RegionEnd) {
- RegionEnd = std::prev(RegionEnd);
- --RemainingCount;
- continue;
- }
- // Skip regions with one instruction.
- if (I == std::prev(RegionEnd)) {
- RegionEnd = std::prev(RegionEnd);
- continue;
- }
-
- Packetizer.PacketizeMIs(MBB, I, RegionEnd);
- RegionEnd = I;
+ for (auto &MB : MF) {
+ auto Begin = MB.begin(), End = MB.end();
+ while (Begin != End) {
+ // First the first non-boundary starting from the end of the last
+ // scheduling region.
+ MachineBasicBlock::iterator RB = Begin;
+ while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
+ ++RB;
+ // First the first boundary starting from the beginning of the new
+ // region.
+ MachineBasicBlock::iterator RE = RB;
+ while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
+ ++RE;
+ // Add the scheduling boundary if it's not block end.
+ if (RE != End)
+ ++RE;
+ // If RB == End, then RE == End.
+ if (RB != End)
+ Packetizer.PacketizeMIs(&MB, RB, RE);
+
+ Begin = RE;
}
}
+ Packetizer.unpacketizeSoloInstrs(MF);
return true;
}
-static bool IsIndirectCall(MachineInstr* MI) {
- return MI->getOpcode() == Hexagon::J2_callr;
+// Reserve resources for a constant extender. Trigger an assertion if the
+// reservation fails.
+void HexagonPacketizerList::reserveResourcesForConstExt() {
+ if (!tryAllocateResourcesForConstExt(true))
+ llvm_unreachable("Resources not available");
}
-// Reserve resources for constant extender. Trigure an assertion if
-// reservation fail.
-void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
-
- if (ResourceTracker->canReserveResources(PseudoMI)) {
- ResourceTracker->reserveResources(PseudoMI);
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- } else {
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- llvm_unreachable("can not reserve resources for constant extender.");
- }
- return;
+bool HexagonPacketizerList::canReserveResourcesForConstExt() {
+ return tryAllocateResourcesForConstExt(false);
}
-bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
- "Should only be called for constant extended instructions");
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
- bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
- MF.DeleteMachineInstr(PseudoMI);
- return CanReserve;
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
+// return true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
+ auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
+ bool Avail = ResourceTracker->canReserveResources(ExtMI);
+ if (Reserve && Avail)
+ ResourceTracker->reserveResources(ExtMI);
+ MF.DeleteMachineInstr(ExtMI);
+ return Avail;
}
-// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
-// true, otherwise, return false.
-bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
- MI->getDebugLoc());
- if (ResourceTracker->canReserveResources(PseudoMI)) {
- ResourceTracker->reserveResources(PseudoMI);
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+ SDep::Kind DepType, unsigned DepReg) {
+ // Check for LR dependence.
+ if (DepReg == HRI->getRARegister())
return true;
- } else {
- MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
- return false;
- }
-}
-
-
-bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
- SDep::Kind DepType,
- unsigned DepReg) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
-
- // Check for lr dependence
- if (DepReg == QRI->getRARegister()) {
- return true;
- }
- if (QII->isDeallocRet(MI)) {
- if (DepReg == QRI->getFrameRegister() ||
- DepReg == QRI->getStackRegister())
+ if (HII->isDeallocRet(MI))
+ if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
return true;
- }
- // Check if this is a predicate dependence
- const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
- if (RC == &Hexagon::PredRegsRegClass) {
+ // Check if this is a predicate dependence.
+ const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
+ if (RC == &Hexagon::PredRegsRegClass)
return true;
- }
- //
- // Lastly check for an operand used in an indirect call
- // If we had an attribute for checking if an instruction is an indirect call,
- // then we could have avoided this relatively brittle implementation of
- // IsIndirectCall()
- //
- // Assumes that the first operand of the CALLr is the function address
- //
- if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+ // Assumes that the first operand of the CALLr is the function address.
+ if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
MachineOperand MO = MI->getOperand(0);
- if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+ if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
return true;
- }
}
return false;
}
-static bool IsRegDependence(const SDep::Kind DepType) {
- return (DepType == SDep::Data || DepType == SDep::Anti ||
- DepType == SDep::Output);
+static bool isRegDependence(const SDep::Kind DepType) {
+ return DepType == SDep::Data || DepType == SDep::Anti ||
+ DepType == SDep::Output;
}
-static bool IsDirectJump(MachineInstr* MI) {
- return (MI->getOpcode() == Hexagon::J2_jump);
+static bool isDirectJump(const MachineInstr* MI) {
+ return MI->getOpcode() == Hexagon::J2_jump;
}
-static bool IsSchedBarrier(MachineInstr* MI) {
+static bool isSchedBarrier(const MachineInstr* MI) {
switch (MI->getOpcode()) {
case Hexagon::Y2_barrier:
return true;
@@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) {
return false;
}
-static bool IsControlFlow(MachineInstr* MI) {
+static bool isControlFlow(const MachineInstr* MI) {
return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
}
-static bool IsLoopN(MachineInstr *MI) {
- return (MI->getOpcode() == Hexagon::J2_loop0i ||
- MI->getOpcode() == Hexagon::J2_loop0r);
-}
-/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
-/// callee-saved register.
-static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+/// Returns true if the instruction modifies a callee-saved register.
+static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const MCPhysReg *CSR =
- TRI->getCalleeSavedRegs(MI->getParent()->getParent());
- *CSR; ++CSR) {
- unsigned CalleeSavedReg = *CSR;
- if (MI->modifiesRegister(CalleeSavedReg, TRI))
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ if (MI->modifiesRegister(*CSR, TRI))
return true;
- }
return false;
}
-// Returns true if an instruction can be promoted to .new predicate
-// or new-value store.
-bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- return isCondInst(MI) || QII->mayBeNewStore(MI);
+// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+// Returns true if an instruction can be promoted to .new predicate or
+// new-value store.
+bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
+ return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
}
-bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- const MCInstrDesc& TID = MI->getDesc();
- // bug 5670: until that is fixed,
- // this portion is disabled.
- if ( TID.isConditionalBranch() // && !IsRegisterJump(MI)) ||
- || QII->isConditionalTransfer(MI)
- || QII->isConditionalALU32(MI)
- || QII->isConditionalLoad(MI)
- || QII->isConditionalStore(MI)) {
- return true;
+// Promote an instructiont to its .cur form.
+// At this time, we have already made a call to canPromoteToDotCur and made
+// sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert(DepType == SDep::Data);
+ int CurOpcode = HII->getDotCurOp(MI);
+ MI->setDesc(HII->get(CurOpcode));
+ return true;
+}
+
+void HexagonPacketizerList::cleanUpDotCur() {
+ MachineInstr *MI = NULL;
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+ if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+ MI = BI;
+ continue;
+ }
+ if (MI) {
+ for (auto &MO : BI->operands())
+ if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
+ return;
+ }
}
- return false;
+ if (!MI)
+ return;
+ // We did not find a use of the CUR, so de-cur it.
+ MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+ DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
}
+// Check to see if an instruction can be dot cur.
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass *RC) {
+ if (!HII->isV60VectorInstruction(MI))
+ return false;
+ if (!HII->isV60VectorInstruction(MII))
+ return false;
-// Promote an instructiont to its .new form.
-// At this time, we have already made a call to CanPromoteToDotNew
-// and made sure that it can *indeed* be promoted.
-bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
- SDep::Kind DepType, MachineBasicBlock::iterator &MII,
- const TargetRegisterClass* RC) {
+ // Already a dot new instruction.
+ if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
+ return false;
- assert (DepType == SDep::Data);
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ if (!HII->mayBeCurLoad(MI))
+ return false;
+
+ // The "cur value" cannot come from inline asm.
+ if (PacketSU->getInstr()->isInlineAsm())
+ return false;
+
+ // Make sure candidate instruction uses cur.
+ DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
+ MI->dump();
+ dbgs() << "in packet\n";);
+ MachineInstr *MJ = MII;
+ DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
+ unsigned DestReg = MI->getOperand(0).getReg();
+ bool FoundMatch = false;
+ for (auto &MO : MJ->operands())
+ if (MO.isReg() && MO.getReg() == DestReg)
+ FoundMatch = true;
+ if (!FoundMatch)
+ return false;
+
+ // Check for existing uses of a vector register within the packet which
+ // would be affected by converting a vector load into .cur formt.
+ for (auto BI : CurrentPacketMIs) {
+ DEBUG(dbgs() << "packet has "; BI->dump(););
+ if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+ // We can convert the opcode into a .cur.
+ return true;
+}
+// Promote an instruction to its .new form. At this time, we have already
+// made a call to canPromoteToDotNew and made sure that it can *indeed* be
+// promoted.
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+ SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
+ assert (DepType == SDep::Data);
int NewOpcode;
if (RC == &Hexagon::PredRegsRegClass)
- NewOpcode = QII->GetDotNewPredOp(MI, MBPI);
+ NewOpcode = HII->getDotNewPredOp(MI, MBPI);
else
- NewOpcode = QII->GetDotNewOp(MI);
- MI->setDesc(QII->get(NewOpcode));
-
+ NewOpcode = HII->getDotNewOp(MI);
+ MI->setDesc(HII->get(NewOpcode));
return true;
}
-bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- int NewOpcode = QII->GetDotOldOp(MI->getOpcode());
- MI->setDesc(QII->get(NewOpcode));
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
+ int NewOpcode = HII->getDotOldOp(MI->getOpcode());
+ MI->setDesc(HII->get(NewOpcode));
return true;
}
@@ -455,175 +436,173 @@ enum PredicateKind {
/// Returns true if an instruction is predicated on p0 and false if it's
/// predicated on !p0.
-static PredicateKind getPredicateSense(MachineInstr* MI,
- const HexagonInstrInfo *QII) {
- if (!QII->isPredicated(MI))
+static PredicateKind getPredicateSense(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ if (!HII->isPredicated(MI))
return PK_Unknown;
-
- if (QII->isPredicatedTrue(MI))
+ if (HII->isPredicatedTrue(MI))
return PK_True;
-
return PK_False;
}
-static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
- const HexagonInstrInfo *QII) {
- assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+ const HexagonInstrInfo *HII) {
+ assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
#ifndef NDEBUG
// Post Increment means duplicates. Use dense map to find duplicates in the
// list. Caution: Densemap initializes with the minimum of 64 buckets,
// whereas there are at most 5 operands in the post increment.
- DenseMap<unsigned, unsigned> DefRegsSet;
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).isDef()) {
- DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
- }
-
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).isUse()) {
- if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
- return MI->getOperand(opNum);
- }
- }
+ DenseSet<unsigned> DefRegsSet;
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isDef())
+ DefRegsSet.insert(MO.getReg());
+
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
+ return MO;
#else
- if (MI->getDesc().mayLoad()) {
+ if (MI->mayLoad()) {
+ const MachineOperand &Op1 = MI->getOperand(1);
// The 2nd operand is always the post increment operand in load.
- assert(MI->getOperand(1).isReg() &&
- "Post increment operand has be to a register.");
- return (MI->getOperand(1));
+ assert(Op1.isReg() && "Post increment operand has be to a register.");
+ return Op1;
}
if (MI->getDesc().mayStore()) {
+ const MachineOperand &Op0 = MI->getOperand(0);
// The 1st operand is always the post increment operand in store.
- assert(MI->getOperand(0).isReg() &&
- "Post increment operand has be to a register.");
- return (MI->getOperand(0));
+ assert(Op0.isReg() && "Post increment operand has be to a register.");
+ return Op0;
}
#endif
// we should never come here.
llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
}
-// get the value being stored
-static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+// Get the value being stored.
+static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
// value being stored is always the last operand.
- return (MI->getOperand(MI->getNumOperands()-1));
+ return MI->getOperand(MI->getNumOperands()-1);
+}
+
+static bool isLoadAbsSet(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::L4_loadrd_ap:
+ case Hexagon::L4_loadrb_ap:
+ case Hexagon::L4_loadrh_ap:
+ case Hexagon::L4_loadrub_ap:
+ case Hexagon::L4_loadruh_ap:
+ case Hexagon::L4_loadri_ap:
+ return true;
+ }
+ return false;
}
-// can be new value store?
+static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+ assert(isLoadAbsSet(MI));
+ return MI->getOperand(1);
+}
+
+
+// Can be new value store?
// Following restrictions are to be respected in convert a store into
// a new value store.
// 1. If an instruction uses auto-increment, its address register cannot
// be a new-value register. Arch Spec 5.4.2.1
-// 2. If an instruction uses absolute-set addressing mode,
-// its address register cannot be a new-value register.
-// Arch Spec 5.4.2.1.TODO: This is not enabled as
-// as absolute-set address mode patters are not implemented.
+// 2. If an instruction uses absolute-set addressing mode, its address
+// register cannot be a new-value register. Arch Spec 5.4.2.1.
// 3. If an instruction produces a 64-bit result, its registers cannot be used
// as new-value registers. Arch Spec 5.4.2.2.
-// 4. If the instruction that sets a new-value register is conditional, then
+// 4. If the instruction that sets the new-value register is conditional, then
// the instruction that uses the new-value register must also be conditional,
// and both must always have their predicates evaluate identically.
// Arch Spec 5.4.2.3.
-// 5. There is an implied restriction of a packet can not have another store,
-// if there is a new value store in the packet. Corollary, if there is
+// 5. There is an implied restriction that a packet cannot have another store,
+// if there is a new value store in the packet. Corollary: if there is
// already a store in a packet, there can not be a new value store.
// Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::CanPromoteToNewValueStore(
- MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
+ const MachineInstr *PacketMI, unsigned DepReg) {
// Make sure we are looking at the store, that can be promoted.
- if (!QII->mayBeNewStore(MI))
+ if (!HII->mayBeNewStore(MI))
return false;
- // Make sure there is dependency and can be new value'ed
- if (GetStoreValueOperand(MI).isReg() &&
- GetStoreValueOperand(MI).getReg() != DepReg)
+ // Make sure there is dependency and can be new value'd.
+ const MachineOperand &Val = getStoreValueOperand(MI);
+ if (Val.isReg() && Val.getReg() != DepReg)
return false;
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
const MCInstrDesc& MCID = PacketMI->getDesc();
- // first operand is always the result
-
- const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
-
- // if there is already an store in the packet, no can do new value store
- // Arch Spec 3.4.4.2.
- for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end();
- (VI != VE); ++VI) {
- SUnit *PacketSU = MIToSUnit.find(*VI)->second;
- if (PacketSU->getInstr()->getDesc().mayStore() ||
- // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
- // then we don't need this
- PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
- PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe)
- return false;
- }
- if (PacketRC == &Hexagon::DoubleRegsRegClass) {
- // new value store constraint: double regs can not feed into new value store
- // arch spec section: 5.4.2.2
+ // First operand is always the result.
+ const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+ // Double regs can not feed into new value store: PRM section: 5.4.2.2.
+ if (PacketRC == &Hexagon::DoubleRegsRegClass)
return false;
+
+ // New-value stores are of class NV (slot 0), dual stores require class ST
+ // in slot 0 (PRM 5.5).
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ if (PacketSU->getInstr()->mayStore())
+ return false;
}
// Make sure it's NOT the post increment register that we are going to
// new value.
- if (QII->isPostIncrement(MI) &&
- MI->getDesc().mayStore() &&
- GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+ if (HII->isPostIncrement(MI) &&
+ getPostIncrementOperand(MI, HII).getReg() == DepReg) {
return false;
}
- if (QII->isPostIncrement(PacketMI) &&
- PacketMI->getDesc().mayLoad() &&
- GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
- // if source is post_inc, or absolute-set addressing,
- // it can not feed into new value store
- // r3 = memw(r2++#4)
- // memw(r30 + #-1404) = r2.new -> can not be new value store
- // arch spec section: 5.4.2.1
+ if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+ getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
+ // If source is post_inc, or absolute-set addressing, it can not feed
+ // into new value store
+ // r3 = memw(r2++#4)
+ // memw(r30 + #-1404) = r2.new -> can not be new value store
+ // arch spec section: 5.4.2.1.
return false;
}
+ if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
+ return false;
+
// If the source that feeds the store is predicated, new value store must
// also be predicated.
- if (QII->isPredicated(PacketMI)) {
- if (!QII->isPredicated(MI))
+ if (HII->isPredicated(PacketMI)) {
+ if (!HII->isPredicated(MI))
return false;
// Check to make sure that they both will have their predicates
- // evaluate identically
+ // evaluate identically.
unsigned predRegNumSrc = 0;
unsigned predRegNumDst = 0;
const TargetRegisterClass* predRegClass = nullptr;
- // Get predicate register used in the source instruction
- for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
- if ( PacketMI->getOperand(opNum).isReg())
- predRegNumSrc = PacketMI->getOperand(opNum).getReg();
- predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
- if (predRegClass == &Hexagon::PredRegsRegClass) {
+ // Get predicate register used in the source instruction.
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumSrc = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
break;
- }
}
- assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
- ("predicate register not found in a predicated PacketMI instruction"));
-
- // Get predicate register used in new-value store instruction
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
- if ( MI->getOperand(opNum).isReg())
- predRegNumDst = MI->getOperand(opNum).getReg();
- predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
- if (predRegClass == &Hexagon::PredRegsRegClass) {
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated PacketMI instruction");
+
+ // Get predicate register used in new-value store instruction.
+ for (auto &MO : MI->operands()) {
+ if (!MO.isReg())
+ continue;
+ predRegNumDst = MO.getReg();
+ predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
+ if (predRegClass == &Hexagon::PredRegsRegClass)
break;
- }
}
- assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
- ("predicate register not found in a predicated MI instruction"));
+ assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+ "predicate register not found in a predicated MI instruction");
// New-value register producer and user (store) need to satisfy these
// constraints:
@@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
// should also be .new predicated and if producer is not .new predicated
// then store should not be .new predicated.
// 3) Both new-value register producer and user should have same predicate
- // sense, i.e, either both should be negated or both should be none negated.
-
- if (( predRegNumDst != predRegNumSrc) ||
- QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) ||
- getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) {
+ // sense, i.e, either both should be negated or both should be non-negated.
+ if (predRegNumDst != predRegNumSrc ||
+ HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
+ getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
return false;
- }
}
// Make sure that other than the new-value register no other store instruction
@@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
// including PacketMI. Howerver, we need to perform the check for the
// remaining instructions in the packet.
- std::vector<MachineInstr*>::iterator VI;
- std::vector<MachineInstr*>::iterator VE;
unsigned StartCheck = 0;
- for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
- (VI != VE); ++VI) {
- SUnit *TempSU = MIToSUnit.find(*VI)->second;
+ for (auto I : CurrentPacketMIs) {
+ SUnit *TempSU = MIToSUnit.find(I)->second;
MachineInstr* TempMI = TempSU->getInstr();
// Following condition is true for all the instructions until PacketMI is
// reached (StartCheck is set to 0 before the for loop).
// StartCheck flag is 1 for all the instructions after PacketMI.
- if (TempMI != PacketMI && !StartCheck) // start processing only after
- continue; // encountering PacketMI
+ if (TempMI != PacketMI && !StartCheck) // Start processing only after
+ continue; // encountering PacketMI.
StartCheck = 1;
- if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+ if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
continue;
- for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
- if (MI->getOperand(opNum).isReg() &&
- TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
- QRI))
+ for (auto &MO : MI->operands())
+ if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
return false;
- }
}
// Make sure that for non-POST_INC stores:
// 1. The only use of reg is DepReg and no other registers.
// This handles V4 base+index registers.
// The following store can not be dot new.
- // Eg. r0 = add(r0, #3)a
+ // Eg. r0 = add(r0, #3)
// memw(r1+r0<<#2) = r0
- if (!QII->isPostIncrement(MI) &&
- GetStoreValueOperand(MI).isReg() &&
- GetStoreValueOperand(MI).getReg() == DepReg) {
- for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
- if (MI->getOperand(opNum).isReg() &&
- MI->getOperand(opNum).getReg() == DepReg) {
- return false;
- }
- }
- // 2. If data definition is because of implicit definition of the register,
- // do not newify the store. Eg.
- // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
- // STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
- for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
- if (PacketMI->getOperand(opNum).isReg() &&
- PacketMI->getOperand(opNum).getReg() == DepReg &&
- PacketMI->getOperand(opNum).isDef() &&
- PacketMI->getOperand(opNum).isImplicit()) {
+ if (!HII->isPostIncrement(MI)) {
+ for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.isReg() && MO.getReg() == DepReg)
return false;
- }
}
}
+ // If data definition is because of implicit definition of the register,
+ // do not newify the store. Eg.
+ // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+ // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+ for (auto &MO : PacketMI->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
+ continue;
+ unsigned R = MO.getReg();
+ if (R == DepReg || HRI->isSuperRegister(DepReg, R))
+ return false;
+ }
+
+ // Handle imp-use of super reg case. There is a target independent side
+ // change that should prevent this situation but I am handling it for
+ // just-in-case. For example, we cannot newify R2 in the following case:
+ // %R3<def> = A2_tfrsi 0;
+ // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
+ for (auto &MO : MI->operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
+ return false;
+ }
+
// Can be dot new store.
return true;
}
-// can this MI to promoted to either
-// new value store or new value jump
-bool HexagonPacketizerList::CanPromoteToNewValue(
- MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
- if (!QII->mayBeNewStore(MI))
+// Can this MI to promoted to either new value store or new value jump.
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg,
+ MachineBasicBlock::iterator &MII) {
+ if (!HII->mayBeNewStore(MI))
return false;
- MachineInstr *PacketMI = PacketSU->getInstr();
-
// Check to see the store can be new value'ed.
- if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+ MachineInstr *PacketMI = PacketSU->getInstr();
+ if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
return true;
// Check to see the compare/jump can be new value'ed.
@@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue(
return false;
}
+static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
+ for (auto &MO : I->operands())
+ if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+ return true;
+ return false;
+}
+
// Check to see if an instruction can be dot new
// There are three kinds.
// 1. dot new on predicate - V2/V3/V4
// 2. dot new on stores NV/ST - V4
// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::CanPromoteToDotNew(
- MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit,
- MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) {
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+ const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC) {
// Already a dot new instruction.
- if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
+ if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
return false;
if (!isNewifiable(MI))
return false;
+ const MachineInstr *PI = PacketSU->getInstr();
+
+ // The "new value" cannot come from inline asm.
+ if (PI->isInlineAsm())
+ return false;
+
+ // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
+ // sense.
+ if (PI->isImplicitDef())
+ return false;
+
+ // If dependency is trough an implicitly defined register, we should not
+ // newify the use.
+ if (isImplicitDependency(PI, DepReg))
+ return false;
+
+ const MCInstrDesc& MCID = PI->getDesc();
+ const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+ if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
+ return false;
+
// predicate .new
- if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
- return true;
- else if (RC != &Hexagon::PredRegsRegClass &&
- !QII->mayBeNewStore(MI)) // MI is not a new-value store
+ // bug 5670: until that is fixed
+ // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+ if (RC == &Hexagon::PredRegsRegClass)
+ if (HII->isCondInst(MI) || MI->isReturn())
+ return HII->predCanBeUsedAsDotNew(PI, DepReg);
+
+ if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
+ return false;
+
+ // Create a dot new machine instruction to see if resources can be
+ // allocated. If not, bail out now.
+ int NewOpcode = HII->getDotNewOp(MI);
+ const MCInstrDesc &D = HII->get(NewOpcode);
+ MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
+ bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+ MF.DeleteMachineInstr(NewMI);
+ if (!ResourcesAvailable)
+ return false;
+
+ // New Value Store only. New Value Jump generated as a separate pass.
+ if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
return false;
- else {
- // Create a dot new machine instruction to see if resources can be
- // allocated. If not, bail out now.
- int NewOpcode = QII->GetDotNewOp(MI);
- const MCInstrDesc &desc = QII->get(NewOpcode);
- DebugLoc dl;
- MachineInstr *NewMI =
- MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
- bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
- MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
-
- if (!ResourcesAvailable)
- return false;
- // new value store only
- // new new value jump generated as a passes
- if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
- return false;
- }
- }
return true;
}
-// Go through the packet instructions and search for anti dependency
-// between them and DepReg from MI
-// Consider this case:
+// Go through the packet instructions and search for an anti dependency between
+// them and DepReg from MI. Consider this case:
// Trying to add
// a) %R1<def> = TFRI_cdNotPt %P3, 2
// to this packet:
// {
-// b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
-// c) %P3<def> = TFR_PdRs %R23
-// d) %R1<def> = TFRI_cdnPt %P3, 4
+// b) %P0<def> = C2_or %P3<kill>, %P0<kill>
+// c) %P3<def> = C2_tfrrp %R23
+// d) %R1<def> = C2_cmovenewit %P3, 4
// }
// The P3 from a) and d) will be complements after
// a)'s P3 is converted to .new form
-// Anti Dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::RestrictingDepExistInPacket(
- MachineInstr *MI, unsigned DepReg,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+// Anti-dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+ unsigned DepReg) {
SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
- for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
- VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
+ for (auto I : CurrentPacketMIs) {
// We only care for dependencies to predicated instructions
- if(!QII->isPredicated(*VIN)) continue;
+ if (!HII->isPredicated(I))
+ continue;
// Scheduling Unit for current insn in the packet
- SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
- // Look at dependencies between current members of the packet
- // and predicate defining instruction MI.
- // Make sure that dependency is on the exact register
- // we care about.
+ // Look at dependencies between current members of the packet and
+ // predicate defining instruction MI. Make sure that dependency is
+ // on the exact register we care about.
if (PacketSU->isSucc(PacketSUDep)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
- if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
- (PacketSU->Succs[i].getKind() == SDep::Anti) &&
- (PacketSU->Succs[i].getReg() == DepReg)) {
+ auto &Dep = PacketSU->Succs[i];
+ if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
+ Dep.getReg() == DepReg)
return true;
- }
}
}
}
@@ -831,276 +821,362 @@ static unsigned getPredicatedRegister(MachineInstr *MI,
const HexagonInstrInfo *QII) {
/// We use the following rule: The first predicate register that is a use is
/// the predicate register of a predicated instruction.
-
assert(QII->isPredicated(MI) && "Must be predicated instruction");
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- MachineOperand &Op = *OI;
+ for (auto &Op : MI->operands()) {
if (Op.isReg() && Op.getReg() && Op.isUse() &&
Hexagon::PredRegsRegClass.contains(Op.getReg()))
return Op.getReg();
}
llvm_unreachable("Unknown instruction operand layout");
-
return 0;
}
// Given two predicated instructions, this function detects whether
-// the predicates are complements
-bool HexagonPacketizerList::ArePredicatesComplements(
- MachineInstr *MI1, MachineInstr *MI2,
- const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
+// the predicates are complements.
+bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
+ MachineInstr *MI2) {
// If we don't know the predicate sense of the instructions bail out early, we
// need it later.
- if (getPredicateSense(MI1, QII) == PK_Unknown ||
- getPredicateSense(MI2, QII) == PK_Unknown)
+ if (getPredicateSense(MI1, HII) == PK_Unknown ||
+ getPredicateSense(MI2, HII) == PK_Unknown)
return false;
- // Scheduling unit for candidate
- SUnit *SU = MIToSUnit.find(MI1)->second;
+ // Scheduling unit for candidate.
+ SUnit *SU = MIToSUnit[MI1];
// One corner case deals with the following scenario:
// Trying to add
- // a) %R24<def> = TFR_cPt %P0, %R25
+ // a) %R24<def> = A2_tfrt %P0, %R25
// to this packet:
- //
// {
- // b) %R25<def> = TFR_cNotPt %P0, %R24
- // c) %P0<def> = CMPEQri %R26, 1
+ // b) %R25<def> = A2_tfrf %P0, %R24
+ // c) %P0<def> = C2_cmpeqi %R26, 1
// }
//
- // On general check a) and b) are complements, but
- // presence of c) will convert a) to .new form, and
- // then it is not a complement
- // We attempt to detect it by analyzing existing
- // dependencies in the packet
+ // On general check a) and b) are complements, but presence of c) will
+ // convert a) to .new form, and then it is not a complement.
+ // We attempt to detect it by analyzing existing dependencies in the packet.
// Analyze relationships between all existing members of the packet.
- // Look for Anti dependecy on the same predicate reg
- // as used in the candidate
- for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
- VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
- // Scheduling Unit for current insn in the packet
- SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+ // Look for Anti dependecy on the same predicate reg as used in the
+ // candidate.
+ for (auto I : CurrentPacketMIs) {
+ // Scheduling Unit for current insn in the packet.
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
// If this instruction in the packet is succeeded by the candidate...
if (PacketSU->isSucc(SU)) {
for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
- // The corner case exist when there is true data
- // dependency between candidate and one of current
- // packet members, this dep is on predicate reg, and
- // there already exist anti dep on the same pred in
+ auto Dep = PacketSU->Succs[i];
+ // The corner case exist when there is true data dependency between
+ // candidate and one of current packet members, this dep is on
+ // predicate reg, and there already exist anti dep on the same pred in
// the packet.
- if (PacketSU->Succs[i].getSUnit() == SU &&
- PacketSU->Succs[i].getKind() == SDep::Data &&
- Hexagon::PredRegsRegClass.contains(
- PacketSU->Succs[i].getReg()) &&
- // Here I know that *VIN is predicate setting instruction
- // with true data dep to candidate on the register
- // we care about - c) in the above example.
- // Now I need to see if there is an anti dependency
- // from c) to any other instruction in the
- // same packet on the pred reg of interest
- RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
- MIToSUnit)) {
- return false;
+ if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
+ Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
+ // Here I know that I is predicate setting instruction with true
+ // data dep to candidate on the register we care about - c) in the
+ // above example. Now I need to see if there is an anti dependency
+ // from c) to any other instruction in the same packet on the pred
+ // reg of interest.
+ if (restrictingDepExistInPacket(I, Dep.getReg()))
+ return false;
}
}
}
}
- // If the above case does not apply, check regular
- // complement condition.
- // Check that the predicate register is the same and
- // that the predicate sense is different
- // We also need to differentiate .old vs. .new:
- // !p0 is not complimentary to p0.new
- unsigned PReg1 = getPredicatedRegister(MI1, QII);
- unsigned PReg2 = getPredicatedRegister(MI2, QII);
- return ((PReg1 == PReg2) &&
- Hexagon::PredRegsRegClass.contains(PReg1) &&
- Hexagon::PredRegsRegClass.contains(PReg2) &&
- (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) &&
- (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
+ // If the above case does not apply, check regular complement condition.
+ // Check that the predicate register is the same and that the predicate
+ // sense is different We also need to differentiate .old vs. .new: !p0
+ // is not complementary to p0.new.
+ unsigned PReg1 = getPredicatedRegister(MI1, HII);
+ unsigned PReg2 = getPredicatedRegister(MI2, HII);
+ return PReg1 == PReg2 &&
+ Hexagon::PredRegsRegClass.contains(PReg1) &&
+ Hexagon::PredRegsRegClass.contains(PReg2) &&
+ getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
+ HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
}
-// initPacketizerState - Initialize packetizer flags
+// Initialize packetizer flags.
void HexagonPacketizerList::initPacketizerState() {
-
Dependence = false;
PromotedToDotNew = false;
GlueToNewValueJump = false;
GlueAllocframeStore = false;
FoundSequentialDependence = false;
-
- return;
}
-// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+// Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock*) {
if (MI->isDebugValue())
return true;
if (MI->isCFIInstruction())
return false;
- // We must print out inline assembly
+ // We must print out inline assembly.
if (MI->isInlineAsm())
return false;
- // We check if MI has any functional units mapped to it.
- // If it doesn't, we ignore the instruction.
+ if (MI->isImplicitDef())
+ return false;
+
+ // We check if MI has any functional units mapped to it. If it doesn't,
+ // we ignore the instruction.
const MCInstrDesc& TID = MI->getDesc();
- unsigned SchedClass = TID.getSchedClass();
- const InstrStage* IS =
- ResourceTracker->getInstrItins()->beginStage(SchedClass);
+ auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
unsigned FuncUnits = IS->getUnits();
return !FuncUnits;
}
-// isSoloInstruction: - Returns true for instructions that must be
-// scheduled in their own packet.
-bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
if (MI->isEHLabel() || MI->isCFIInstruction())
return true;
- if (MI->isInlineAsm())
+ // Consider inline asm to not be a solo instruction by default.
+ // Inline asm will be put in a packet temporarily, but then it will be
+ // removed, and placed outside of the packet (before or after, depending
+ // on dependencies). This is to reduce the impact of inline asm as a
+ // "packet splitting" instruction.
+ if (MI->isInlineAsm() && !ScheduleInlineAsm)
return true;
// From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
// trap, pause, barrier, icinva, isync, and syncht are solo instructions.
// They must not be grouped with other instructions in a packet.
- if (IsSchedBarrier(MI))
+ if (isSchedBarrier(MI))
+ return true;
+
+ if (HII->isSolo(MI))
+ return true;
+
+ if (MI->getOpcode() == Hexagon::A2_nop)
return true;
return false;
}
-// isLegalToPacketizeTogether:
-// SUI is the current instruction that is out side of the current packet.
-// SUJ is the current instruction inside the current packet against which that
-// SUI will be packetized.
-bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
- MachineInstr *I = SUI->getInstr();
- MachineInstr *J = SUJ->getInstr();
- assert(I && J && "Unable to packetize null instruction!");
- const MCInstrDesc &MCIDI = I->getDesc();
- const MCInstrDesc &MCIDJ = J->getDesc();
+// Quick check if instructions MI and MJ cannot coexist in the same packet.
+// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
+// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
+// For full test call this function twice:
+// cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
+// Doing the test only one way saves the amount of code in this function,
+// since every test would need to be repeated with the MI and MJ reversed.
+static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+ const HexagonInstrInfo &HII) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+ HII.isHVXMemWithAIndirect(MI, MJ))
+ return true;
- MachineBasicBlock::iterator II = I;
+ // An inline asm cannot be together with a branch, because we may not be
+ // able to remove the asm out after packetizing (i.e. if the asm must be
+ // moved past the bundle). Similarly, two asms cannot be together to avoid
+ // complications when determining their relative order outside of a bundle.
+ if (MI->isInlineAsm())
+ return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
+ MJ->isCall() || MJ->isTerminator();
- const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
- const HexagonRegisterInfo *QRI =
- (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+ // "False" really means that the quick check failed to determine if
+ // I and J cannot coexist.
+ return false;
+}
- // Inline asm cannot go in the packet.
- if (I->getOpcode() == Hexagon::INLINEASM)
- llvm_unreachable("Should not meet inline asm here!");
- if (isSoloInstruction(I))
- llvm_unreachable("Should not meet solo instr here!");
+// Full, symmetric check.
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
+ const MachineInstr *MJ) {
+ return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
+}
- // A save callee-save register function call can only be in a packet
- // with instructions that don't write to the callee-save registers.
- if ((QII->isSaveCalleeSavedRegsCall(I) &&
- DoesModifyCalleeSavedReg(J, QRI)) ||
- (QII->isSaveCalleeSavedRegsCall(J) &&
- DoesModifyCalleeSavedReg(I, QRI))) {
- Dependence = true;
- return false;
+void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
+ for (auto &B : MF) {
+ MachineBasicBlock::iterator BundleIt;
+ MachineBasicBlock::instr_iterator NextI;
+ for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ MachineInstr *MI = &*I;
+ if (MI->isBundle())
+ BundleIt = I;
+ if (!MI->isInsideBundle())
+ continue;
+
+ // Decide on where to insert the instruction that we are pulling out.
+ // Debug instructions always go before the bundle, but the placement of
+ // INLINE_ASM depends on potential dependencies. By default, try to
+ // put it before the bundle, but if the asm writes to a register that
+ // other instructions in the bundle read, then we need to place it
+ // after the bundle (to preserve the bundle semantics).
+ bool InsertBeforeBundle;
+ if (MI->isInlineAsm())
+ InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
+ else if (MI->isDebugValue())
+ InsertBeforeBundle = true;
+ else
+ continue;
+
+ BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
+ }
}
+}
- // Two control flow instructions cannot go in the same packet.
- if (IsControlFlow(I) && IsControlFlow(J)) {
- Dependence = true;
- return false;
+// Check if a given instruction is of class "system".
+static bool isSystemInstr(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::Y2_barrier:
+ case Hexagon::Y2_dcfetchbo:
+ return true;
}
+ return false;
+}
- // A LoopN instruction cannot appear in the same packet as a jump or call.
- if (IsLoopN(I) &&
- (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) {
- Dependence = true;
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // The dependence graph may not include edges between dead definitions,
+ // so without extra checks, we could end up packetizing two instruction
+ // defining the same (dead) register.
+ if (I->isCall() || J->isCall())
return false;
- }
- if (IsLoopN(J) &&
- (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) {
- Dependence = true;
+ if (HII->isPredicated(I) || HII->isPredicated(J))
return false;
+
+ BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
+ for (auto &MO : I->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ DeadDefs[MO.getReg()] = true;
}
+ for (auto &MO : J->operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+ continue;
+ unsigned R = MO.getReg();
+ if (R != Hexagon::USR_OVF && DeadDefs[R])
+ return true;
+ }
+ return false;
+}
+
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ // A save callee-save register function call can only be in a packet
+ // with instructions that don't write to the callee-save registers.
+ if ((HII->isSaveCalleeSavedRegsCall(I) &&
+ doesModifyCalleeSavedReg(J, HRI)) ||
+ (HII->isSaveCalleeSavedRegsCall(J) &&
+ doesModifyCalleeSavedReg(I, HRI)))
+ return true;
+
+ // Two control flow instructions cannot go in the same packet.
+ if (isControlFlow(I) && isControlFlow(J))
+ return true;
+
+ // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
+ // contain a speculative indirect jump,
+ // a new-value compare jump or a dealloc_return.
+ auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
+ if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+ return true;
+ if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
+ return true;
+ return false;
+ };
+
+ if (HII->isLoopN(I) && isBadForLoopN(J))
+ return true;
+ if (HII->isLoopN(J) && isBadForLoopN(I))
+ return true;
+
// dealloc_return cannot appear in the same packet as a conditional or
// unconditional jump.
- if (QII->isDeallocRet(I) &&
- (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) {
- Dependence = true;
- return false;
+ return HII->isDeallocRet(I) &&
+ (J->isBranch() || J->isCall() || J->isBarrier());
+}
+
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
+ const MachineInstr *J) {
+ bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
+ bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+ if ((SysI && StoreJ) || (SysJ && StoreI))
+ return true;
+
+ if (StoreI && StoreJ) {
+ if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
+ return true;
+ } else {
+ // A memop cannot be in the same packet with another memop or a store.
+ // Two stores can be together, but here I and J cannot both be stores.
+ bool MopStI = HII->isMemOp(I) || StoreI;
+ bool MopStJ = HII->isMemOp(J) || StoreJ;
+ if (MopStI && MopStJ)
+ return true;
}
+ return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
+}
- // V4 allows dual store. But does not allow second store, if the
- // first store is not in SLOT0. New value store, new value jump,
- // dealloc_return and memop always take SLOT0.
- // Arch spec 3.4.4.2
- if (MCIDI.mayStore() && MCIDJ.mayStore() &&
- (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
- Dependence = true;
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ MachineInstr *I = SUI->getInstr();
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
+
+ // Clear IgnoreDepMIs when Packet starts.
+ if (CurrentPacketMIs.size() == 1)
+ IgnoreDepMIs.clear();
+
+ MachineBasicBlock::iterator II = I;
+ const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+ // Solo instructions cannot go in the packet.
+ assert(!isSoloInstruction(I) && "Unexpected solo instr!");
+
+ if (cannotCoexist(I, J))
return false;
- }
- if ((QII->isMemOp(J) && MCIDI.mayStore())
- || (MCIDJ.mayStore() && QII->isMemOp(I))
- || (QII->isMemOp(J) && QII->isMemOp(I))) {
- Dependence = true;
+ Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
+ if (Dependence)
return false;
- }
- //if dealloc_return
- if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
- Dependence = true;
+ // V4 allows dual stores. It does not allow second store, if the first
+ // store is not in SLOT0. New value store, new value jump, dealloc_return
+ // and memop always take SLOT0. Arch spec 3.4.4.2.
+ Dependence = hasV4SpecificDependence(I, J);
+ if (Dependence)
return false;
- }
// If an instruction feeds new value jump, glue it.
MachineBasicBlock::iterator NextMII = I;
++NextMII;
- if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+ if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
MachineInstr *NextMI = NextMII;
bool secondRegMatch = false;
- bool maintainNewValueJump = false;
+ const MachineOperand &NOp0 = NextMI->getOperand(0);
+ const MachineOperand &NOp1 = NextMI->getOperand(1);
- if (NextMI->getOperand(1).isReg() &&
- I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+ if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
secondRegMatch = true;
- maintainNewValueJump = true;
- }
-
- if (!secondRegMatch &&
- I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
- maintainNewValueJump = true;
- }
- for (std::vector<MachineInstr*>::iterator
- VI = CurrentPacketMIs.begin(),
- VE = CurrentPacketMIs.end();
- (VI != VE && maintainNewValueJump); ++VI) {
- SUnit *PacketSU = MIToSUnit.find(*VI)->second;
-
- // NVJ can not be part of the dual jump - Arch Spec: section 7.8
- if (PacketSU->getInstr()->getDesc().isCall()) {
+ for (auto I : CurrentPacketMIs) {
+ SUnit *PacketSU = MIToSUnit.find(I)->second;
+ MachineInstr *PI = PacketSU->getInstr();
+ // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
+ if (PI->isCall()) {
Dependence = true;
break;
}
- // Validate
+ // Validate:
// 1. Packet does not have a store in it.
// 2. If the first operand of the nvj is newified, and the second
// operand is also a reg, it (second reg) is not defined in
@@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// 3. If the second operand of the nvj is newified, (which means
// first operand is also a reg), first reg is not defined in
// the same packet.
- if (PacketSU->getInstr()->getDesc().mayStore() ||
- PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
- // Check #2.
- (!secondRegMatch && NextMI->getOperand(1).isReg() &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(1).getReg(), QRI)) ||
- // Check #3.
- (secondRegMatch &&
- PacketSU->getInstr()->modifiesRegister(
- NextMI->getOperand(0).getReg(), QRI))) {
+ if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+ HII->isLoopN(PI)) {
+ Dependence = true;
+ break;
+ }
+ // Check #2/#3.
+ const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
+ if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
Dependence = true;
break;
}
}
- if (!Dependence)
- GlueToNewValueJump = true;
- else
+
+ if (Dependence)
return false;
+ GlueToNewValueJump = true;
}
- if (SUJ->isSucc(SUI)) {
- for (unsigned i = 0;
- (i < SUJ->Succs.size()) && !FoundSequentialDependence;
- ++i) {
+ // There no dependency between a prolog instruction and its successor.
+ if (!SUJ->isSucc(SUI))
+ return true;
- if (SUJ->Succs[i].getSUnit() != SUI) {
- continue;
- }
+ for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
+ if (FoundSequentialDependence)
+ break;
- SDep::Kind DepType = SUJ->Succs[i].getKind();
+ if (SUJ->Succs[i].getSUnit() != SUI)
+ continue;
- // For direct calls:
- // Ignore register dependences for call instructions for
- // packetization purposes except for those due to r31 and
- // predicate registers.
- //
- // For indirect calls:
- // Same as direct calls + check for true dependences to the register
- // used in the indirect call.
- //
- // We completely ignore Order dependences for call instructions
- //
- // For returns:
- // Ignore register dependences for return instructions like jumpr,
- // dealloc return unless we have dependencies on the explicit uses
- // of the registers used by jumpr (like r31) or dealloc return
- // (like r29 or r30).
- //
- // TODO: Currently, jumpr is handling only return of r31. So, the
- // following logic (specificaly IsCallDependent) is working fine.
- // We need to enable jumpr for register other than r31 and then,
- // we need to rework the last part, where it handles indirect call
- // of that (IsCallDependent) function. Bug 6216 is opened for this.
- //
- unsigned DepReg = 0;
- const TargetRegisterClass* RC = nullptr;
- if (DepType == SDep::Data) {
- DepReg = SUJ->Succs[i].getReg();
- RC = QRI->getMinimalPhysRegClass(DepReg);
- }
- if ((MCIDI.isCall() || MCIDI.isReturn()) &&
- (!IsRegDependence(DepType) ||
- !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
- /* do nothing */
- }
+ SDep::Kind DepType = SUJ->Succs[i].getKind();
+ // For direct calls:
+ // Ignore register dependences for call instructions for packetization
+ // purposes except for those due to r31 and predicate registers.
+ //
+ // For indirect calls:
+ // Same as direct calls + check for true dependences to the register
+ // used in the indirect call.
+ //
+ // We completely ignore Order dependences for call instructions.
+ //
+ // For returns:
+ // Ignore register dependences for return instructions like jumpr,
+ // dealloc return unless we have dependencies on the explicit uses
+ // of the registers used by jumpr (like r31) or dealloc return
+ // (like r29 or r30).
+ //
+ // TODO: Currently, jumpr is handling only return of r31. So, the
+ // following logic (specificaly isCallDependent) is working fine.
+ // We need to enable jumpr for register other than r31 and then,
+ // we need to rework the last part, where it handles indirect call
+ // of that (isCallDependent) function. Bug 6216 is opened for this.
+ unsigned DepReg = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (DepType == SDep::Data) {
+ DepReg = SUJ->Succs[i].getReg();
+ RC = HRI->getMinimalPhysRegClass(DepReg);
+ }
- // For instructions that can be promoted to dot-new, try to promote.
- else if ((DepType == SDep::Data) &&
- CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
- PromoteToDotNew(I, DepType, II, RC)) {
- PromotedToDotNew = true;
- /* do nothing */
- }
+ if (I->isCall() || I->isReturn()) {
+ if (!isRegDependence(DepType))
+ continue;
+ if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
+ continue;
+ }
- else if ((DepType == SDep::Data) &&
- (QII->isNewValueJump(I))) {
- /* do nothing */
- }
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
+ if (promoteToDotCur(J, DepType, II, RC))
+ continue;
+ }
- // For predicated instructions, if the predicates are complements
- // then there can be no dependence.
- else if (QII->isPredicated(I) &&
- QII->isPredicated(J) &&
- ArePredicatesComplements(I, J, MIToSUnit)) {
- /* do nothing */
+ // Data dpendence ok if we have load.cur.
+ if (DepType == SDep::Data && HII->isDotCurInst(J)) {
+ if (HII->isV60VectorInstruction(I))
+ continue;
+ }
+ // For instructions that can be promoted to dot-new, try to promote.
+ if (DepType == SDep::Data) {
+ if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
+ if (promoteToDotNew(I, DepType, II, RC)) {
+ PromotedToDotNew = true;
+ continue;
+ }
}
- else if (IsDirectJump(I) &&
- !MCIDJ.isBranch() &&
- !MCIDJ.isCall() &&
- (DepType == SDep::Order)) {
- // Ignore Order dependences between unconditional direct branches
- // and non-control-flow instructions
- /* do nothing */
- }
- else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
- (DepType != SDep::Output)) {
- // Ignore all dependences for jumps except for true and output
- // dependences
- /* do nothing */
- }
-
- // Ignore output dependences due to superregs. We can
- // write to two different subregisters of R1:0 for instance
- // in the same cycle
- //
+ if (HII->isNewValueJump(I))
+ continue;
+ }
+ // For predicated instructions, if the predicates are complements then
+ // there can be no dependence.
+ if (HII->isPredicated(I) && HII->isPredicated(J) &&
+ arePredicatesComplements(I, J)) {
+ // Not always safe to do this translation.
+ // DAG Builder attempts to reduce dependence edges using transitive
+ // nature of dependencies. Here is an example:
//
- // Let the
- // If neither I nor J defines DepReg, then this is a
- // superfluous output dependence. The dependence must be of the
- // form:
- // R0 = ...
- // R1 = ...
- // and there is an output dependence between the two instructions
- // with
- // DepReg = D0
- // We want to ignore these dependences.
- // Ideally, the dependence constructor should annotate such
- // dependences. We can then avoid this relatively expensive check.
+ // r0 = tfr_pt ... (1)
+ // r0 = tfr_pf ... (2)
+ // r0 = tfr_pt ... (3)
//
- else if (DepType == SDep::Output) {
- // DepReg is the register that's responsible for the dependence.
- unsigned DepReg = SUJ->Succs[i].getReg();
+ // There will be an output dependence between (1)->(2) and (2)->(3).
+ // However, there is no dependence edge between (1)->(3). This results
+ // in all 3 instructions going in the same packet. We ignore dependce
+ // only once to avoid this situation.
+ auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+ if (Itr != IgnoreDepMIs.end()) {
+ Dependence = true;
+ return false;
+ }
+ IgnoreDepMIs.push_back(I);
+ continue;
+ }
+
+ // Ignore Order dependences between unconditional direct branches
+ // and non-control-flow instructions.
+ if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+ DepType == SDep::Order)
+ continue;
+
+ // Ignore all dependences for jumps except for true and output
+ // dependences.
+ if (I->isConditionalBranch() && DepType != SDep::Data &&
+ DepType != SDep::Output)
+ continue;
+
+ // Ignore output dependences due to superregs. We can write to two
+ // different subregisters of R1:0 for instance in the same cycle.
+
+ // If neither I nor J defines DepReg, then this is a superfluous output
+ // dependence. The dependence must be of the form:
+ // R0 = ...
+ // R1 = ...
+ // and there is an output dependence between the two instructions with
+ // DepReg = D0.
+ // We want to ignore these dependences. Ideally, the dependence
+ // constructor should annotate such dependences. We can then avoid this
+ // relatively expensive check.
+ //
+ if (DepType == SDep::Output) {
+ // DepReg is the register that's responsible for the dependence.
+ unsigned DepReg = SUJ->Succs[i].getReg();
+
+ // Check if I and J really defines DepReg.
+ if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+ continue;
+ FoundSequentialDependence = true;
+ break;
+ }
- // Check if I and J really defines DepReg.
- if (I->definesRegister(DepReg) ||
- J->definesRegister(DepReg)) {
+ // For Order dependences:
+ // 1. On V4 or later, volatile loads/stores can be packetized together,
+ // unless other rules prevent is.
+ // 2. Store followed by a load is not allowed.
+ // 3. Store followed by a store is only valid on V4 or later.
+ // 4. Load followed by any memory operation is allowed.
+ if (DepType == SDep::Order) {
+ if (!PacketizeVolatiles) {
+ bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+ if (OrdRefs) {
FoundSequentialDependence = true;
break;
}
}
-
- // We ignore Order dependences for
- // 1. Two loads unless they are volatile.
- // 2. Two stores in V4 unless they are volatile.
- else if ((DepType == SDep::Order) &&
- !I->hasOrderedMemoryRef() &&
- !J->hasOrderedMemoryRef()) {
- if (MCIDI.mayStore() && MCIDJ.mayStore()) {
- /* do nothing */
- }
- // store followed by store-- not OK on V2
- // store followed by load -- not OK on all (OK if addresses
- // are not aliased)
- // load followed by store -- OK on all
- // load followed by load -- OK on all
- else if ( !MCIDJ.mayStore()) {
- /* do nothing */
- }
- else {
+ // J is first, I is second.
+ bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
+ bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+ if (StoreJ) {
+ // Two stores are only allowed on V4+. Load following store is never
+ // allowed.
+ if (LoadI) {
FoundSequentialDependence = true;
break;
}
- }
-
- // For V4, special case ALLOCFRAME. Even though there is dependency
- // between ALLOCFRAME and subsequent store, allow it to be
- // packetized in a same packet. This implies that the store is using
- // caller's SP. Hence, offset needs to be updated accordingly.
- else if (DepType == SDep::Data
- && J->getOpcode() == Hexagon::S2_allocframe
- && (I->getOpcode() == Hexagon::S2_storerd_io
- || I->getOpcode() == Hexagon::S2_storeri_io
- || I->getOpcode() == Hexagon::S2_storerb_io)
- && I->getOperand(0).getReg() == QRI->getStackRegister()
- && QII->isValidOffset(I->getOpcode(),
- I->getOperand(1).getImm() -
- (FrameSize + HEXAGON_LRFP_SIZE)))
- {
- GlueAllocframeStore = true;
- // Since this store is to be glued with allocframe in the same
- // packet, it will use SP of the previous stack frame, i.e
- // caller's SP. Therefore, we need to recalculate offset according
- // to this change.
- I->getOperand(1).setImm(I->getOperand(1).getImm() -
- (FrameSize + HEXAGON_LRFP_SIZE));
- }
-
- //
- // Skip over anti-dependences. Two instructions that are
- // anti-dependent can share a packet
- //
- else if (DepType != SDep::Anti) {
+ } else if (!LoadJ || (!LoadI && !StoreI)) {
+ // If J is neither load nor store, assume a dependency.
+ // If J is a load, but I is neither, also assume a dependency.
FoundSequentialDependence = true;
break;
}
+ // Store followed by store: not OK on V2.
+ // Store followed by load: not OK on all.
+ // Load followed by store: OK on all.
+ // Load followed by load: OK on all.
+ continue;
}
- if (FoundSequentialDependence) {
- Dependence = true;
- return false;
+ // For V4, special case ALLOCFRAME. Even though there is dependency
+ // between ALLOCFRAME and subsequent store, allow it to be packetized
+ // in a same packet. This implies that the store is using the caller's
+ // SP. Hence, offset needs to be updated accordingly.
+ if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
+ unsigned Opc = I->getOpcode();
+ switch (Opc) {
+ case Hexagon::S2_storerd_io:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storerb_io:
+ if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
+ int64_t Imm = I->getOperand(1).getImm();
+ int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
+ if (HII->isValidOffset(Opc, NewOff)) {
+ GlueAllocframeStore = true;
+ // Since this store is to be glued with allocframe in the same
+ // packet, it will use SP of the previous stack frame, i.e.
+ // caller's SP. Therefore, we need to recalculate offset
+ // according to this change.
+ I->getOperand(1).setImm(NewOff);
+ continue;
+ }
+ }
+ default:
+ break;
+ }
+ }
+
+ // Skip over anti-dependences. Two instructions that are anti-dependent
+ // can share a packet.
+ if (DepType != SDep::Anti) {
+ FoundSequentialDependence = true;
+ break;
}
}
+ if (FoundSequentialDependence) {
+ Dependence = true;
+ return false;
+ }
+
return true;
}
-// isLegalToPruneDependencies
bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
MachineInstr *I = SUI->getInstr();
- assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
-
- const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
-
- if (Dependence) {
+ MachineInstr *J = SUJ->getInstr();
+ assert(I && J && "Unable to packetize null instruction!");
- // Check if the instruction was promoted to a dot-new. If so, demote it
- // back into a dot-old.
- if (PromotedToDotNew) {
- DemoteToDotOld(I);
- }
+ if (cannotCoexist(I, J))
+ return false;
- // Check if the instruction (must be a store) was glued with an Allocframe
- // instruction. If so, restore its offset to its original value, i.e. use
- // curent SP instead of caller's SP.
- if (GlueAllocframeStore) {
- I->getOperand(1).setImm(I->getOperand(1).getImm() +
- FrameSize + HEXAGON_LRFP_SIZE);
- }
+ if (!Dependence)
+ return true;
- return false;
+ // Check if the instruction was promoted to a dot-new. If so, demote it
+ // back into a dot-old.
+ if (PromotedToDotNew)
+ demoteToDotOld(I);
+
+ cleanUpDotCur();
+ // Check if the instruction (must be a store) was glued with an allocframe
+ // instruction. If so, restore its offset to its original value, i.e. use
+ // current SP instead of caller's SP.
+ if (GlueAllocframeStore) {
+ unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+ MachineOperand &MOff = I->getOperand(1);
+ MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
}
- return true;
+ return false;
}
+
MachineBasicBlock::iterator
HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+ MachineBasicBlock::iterator MII = MI;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (MI->isImplicitDef()) {
+ unsigned R = MI->getOperand(0).getReg();
+ if (Hexagon::IntRegsRegClass.contains(R)) {
+ MCSuperRegIterator S(R, HRI, false);
+ MI->addOperand(MachineOperand::CreateReg(*S, true, true));
+ }
+ return MII;
+ }
+ assert(ResourceTracker->canReserveResources(MI));
+
+ bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
+ bool Good = true;
+
+ if (GlueToNewValueJump) {
+ MachineInstr *NvjMI = ++MII;
+ // We need to put both instructions in the same packet: MI and NvjMI.
+ // Either of them can require a constant extender. Try to add both to
+ // the current packet, and if that fails, end the packet and start a
+ // new one.
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI)
+ Good = tryAllocateResourcesForConstExt(true);
+
+ bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
+ if (Good) {
+ if (ResourceTracker->canReserveResources(NvjMI))
+ ResourceTracker->reserveResources(NvjMI);
+ else
+ Good = false;
+ }
+ if (Good && ExtNvjMI)
+ Good = tryAllocateResourcesForConstExt(true);
- MachineBasicBlock::iterator MII = MI;
- MachineBasicBlock *MBB = MI->getParent();
-
- const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
- if (GlueToNewValueJump) {
-
- ++MII;
- MachineInstr *nvjMI = MII;
+ if (!Good) {
+ endPacket(MBB, MI);
assert(ResourceTracker->canReserveResources(MI));
ResourceTracker->reserveResources(MI);
- if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
- !tryAllocateResourcesForConstExt(MI)) {
- endPacket(MBB, MI);
- ResourceTracker->reserveResources(MI);
- assert(canReserveResourcesForConstExt(MI) &&
- "Ensure that there is a slot");
- reserveResourcesForConstExt(MI);
- // Reserve resources for new value jump constant extender.
- assert(canReserveResourcesForConstExt(MI) &&
- "Ensure that there is a slot");
- reserveResourcesForConstExt(nvjMI);
- assert(ResourceTracker->canReserveResources(nvjMI) &&
- "Ensure that there is a slot");
-
- } else if ( // Extended instruction takes two slots in the packet.
- // Try reserve and allocate 4-byte in the current packet first.
- (QII->isExtended(nvjMI)
- && (!tryAllocateResourcesForConstExt(nvjMI)
- || !ResourceTracker->canReserveResources(nvjMI)))
- || // For non-extended instruction, no need to allocate extra 4 bytes.
- (!QII->isExtended(nvjMI) &&
- !ResourceTracker->canReserveResources(nvjMI)))
- {
- endPacket(MBB, MI);
- // A new and empty packet starts.
- // We are sure that the resources requirements can be satisfied.
- // Therefore, do not need to call "canReserveResources" anymore.
- ResourceTracker->reserveResources(MI);
- if (QII->isExtended(nvjMI))
- reserveResourcesForConstExt(nvjMI);
+ if (ExtMI) {
+ assert(canReserveResourcesForConstExt());
+ tryAllocateResourcesForConstExt(true);
}
- // Here, we are sure that "reserveResources" would succeed.
- ResourceTracker->reserveResources(nvjMI);
- CurrentPacketMIs.push_back(MI);
- CurrentPacketMIs.push_back(nvjMI);
- } else {
- if ( (QII->isExtended(MI) || QII->isConstExtended(MI))
- && ( !tryAllocateResourcesForConstExt(MI)
- || !ResourceTracker->canReserveResources(MI)))
- {
- endPacket(MBB, MI);
- // Check if the instruction was promoted to a dot-new. If so, demote it
- // back into a dot-old
- if (PromotedToDotNew) {
- DemoteToDotOld(MI);
- }
- reserveResourcesForConstExt(MI);
+ assert(ResourceTracker->canReserveResources(NvjMI));
+ ResourceTracker->reserveResources(NvjMI);
+ if (ExtNvjMI) {
+ assert(canReserveResourcesForConstExt());
+ reserveResourcesForConstExt();
}
- // In case that "MI" is not an extended insn,
- // the resource availability has already been checked.
- ResourceTracker->reserveResources(MI);
- CurrentPacketMIs.push_back(MI);
}
+ CurrentPacketMIs.push_back(MI);
+ CurrentPacketMIs.push_back(NvjMI);
return MII;
+ }
+
+ ResourceTracker->reserveResources(MI);
+ if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
+ endPacket(MBB, MI);
+ if (PromotedToDotNew)
+ demoteToDotOld(MI);
+ ResourceTracker->reserveResources(MI);
+ reserveResourcesForConstExt();
+ }
+
+ CurrentPacketMIs.push_back(MI);
+ return MII;
+}
+
+void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
+ MachineInstr *MI) {
+ OldPacketMIs = CurrentPacketMIs;
+ VLIWPacketizerList::endPacket(MBB, MI);
+}
+
+bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
+ return !producesStall(MI);
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+static bool isDependent(const MachineInstr *ProdMI,
+ const MachineInstr *ConsMI) {
+ if (!ProdMI->getOperand(0).isReg())
+ return false;
+ unsigned DstReg = ProdMI->getOperand(0).getReg();
+
+ for (auto &Op : ConsMI->operands())
+ if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
+ // The MIs depend on each other.
+ return true;
+
+ return false;
}
+// V60 forward scheduling.
+bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+ // Check whether the previous packet is in a different loop. If this is the
+ // case, there is little point in trying to avoid a stall because that would
+ // favor the rare case (loop entry) over the common case (loop iteration).
+ //
+ // TODO: We should really be able to check all the incoming edges if this is
+ // the first packet in a basic block, so we can avoid stalls from the loop
+ // backedge.
+ if (!OldPacketMIs.empty()) {
+ auto *OldBB = OldPacketMIs.front()->getParent();
+ auto *ThisBB = I->getParent();
+ if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
+ return false;
+ }
+
+ // Check for stall between two vector instructions.
+ if (HII->isV60VectorInstruction(I)) {
+ for (auto J : OldPacketMIs) {
+ if (!HII->isV60VectorInstruction(J))
+ continue;
+ if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+ return true;
+ }
+ return false;
+ }
+
+ // Check for stall between two scalar instructions. First, check that
+ // there is no definition of a use in the current packet, because it
+ // may be a candidate for .new.
+ for (auto J : CurrentPacketMIs)
+ if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+ return false;
+
+ // Check for stall between I and instructions in the previous packet.
+ if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
+ for (auto J : OldPacketMIs) {
+ if (HII->isV60VectorInstruction(J))
+ continue;
+ if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+ continue;
+ if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
new file mode 100644
index 0000000..960cf6c
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -0,0 +1,114 @@
+#ifndef HEXAGONVLIWPACKETIZER_H
+#define HEXAGONVLIWPACKETIZER_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+namespace llvm {
+class HexagonPacketizerList : public VLIWPacketizerList {
+ // Vector of instructions assigned to the packet that has just been created.
+ std::vector<MachineInstr*> OldPacketMIs;
+
+ // Has the instruction been promoted to a dot-new instruction.
+ bool PromotedToDotNew;
+
+ // Has the instruction been glued to allocframe.
+ bool GlueAllocframeStore;
+
+ // Has the feeder instruction been glued to new value jump.
+ bool GlueToNewValueJump;
+
+ // Check if there is a dependence between some instruction already in this
+ // packet and this instruction.
+ bool Dependence;
+
+ // Only check for dependence if there are resources available to
+ // schedule this instruction.
+ bool FoundSequentialDependence;
+
+ // Track MIs with ignored dependence.
+ std::vector<MachineInstr*> IgnoreDepMIs;
+
+protected:
+ /// \brief A handle to the branch probability pass.
+ const MachineBranchProbabilityInfo *MBPI;
+ const MachineLoopInfo *MLI;
+
+private:
+ const HexagonInstrInfo *HII;
+ const HexagonRegisterInfo *HRI;
+
+public:
+ // Ctor.
+ HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+ AliasAnalysis *AA,
+ const MachineBranchProbabilityInfo *MBPI);
+
+ // initPacketizerState - initialize some internal flags.
+ void initPacketizerState() override;
+
+ // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+ bool ignorePseudoInstruction(const MachineInstr *MI,
+ const MachineBasicBlock *MBB) override;
+
+ // isSoloInstruction - return true if instruction MI can not be packetized
+ // with any other instruction, which means that MI itself is a packet.
+ bool isSoloInstruction(const MachineInstr *MI) override;
+
+ // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+ // together.
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
+
+ // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+ // and SUJ.
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
+
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
+ void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override;
+ bool shouldAddToPacket(const MachineInstr *MI) override;
+
+ void unpacketizeSoloInstrs(MachineFunction &MF);
+
+protected:
+ bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+ unsigned DepReg);
+ bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ void cleanUpDotCur();
+
+ bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+ MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII,
+ const TargetRegisterClass* RC);
+ bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+ unsigned DepReg, MachineBasicBlock::iterator &MII);
+ bool canPromoteToNewValueStore(const MachineInstr* MI,
+ const MachineInstr* PacketMI, unsigned DepReg);
+ bool demoteToDotOld(MachineInstr* MI);
+ bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2);
+ bool restrictingDepExistInPacket(MachineInstr*, unsigned);
+ bool isNewifiable(const MachineInstr *MI);
+ bool isCurifiable(MachineInstr* MI);
+ bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+ inline bool isPromotedToDotNew() const {
+ return PromotedToDotNew;
+ }
+ bool tryAllocateResourcesForConstExt(bool Reserve);
+ bool canReserveResourcesForConstExt();
+ void reserveResourcesForConstExt();
+ bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
+ bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
+ bool producesStall(const MachineInstr *MI);
+};
+} // namespace llvm
+#endif // HEXAGONVLIWPACKETIZER_H
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 99ea2fa..b73af82 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -13,7 +13,9 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,14 +35,28 @@ class HexagonAsmBackend : public MCAsmBackend {
mutable uint64_t relaxedCnt;
std::unique_ptr <MCInstrInfo> MCII;
std::unique_ptr <MCInst *> RelaxTarget;
+ MCInst * Extender;
public:
HexagonAsmBackend(Target const &T, uint8_t OSABI, StringRef CPU) :
- OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){}
+ OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+ Extender(nullptr) {}
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createHexagonELFObjectWriter(OS, OSABI, CPU);
}
+ void setExtender(MCContext &Context) const {
+ if (Extender == nullptr)
+ const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+ }
+
+ MCInst *takeExtender() const {
+ assert(Extender != nullptr);
+ MCInst * Result = Extender;
+ const_cast<HexagonAsmBackend *>(this)->Extender = nullptr;
+ return Result;
+ }
+
unsigned getNumFixupKinds() const override {
return Hexagon::NumTargetFixupKinds;
}
@@ -222,6 +238,7 @@ public:
if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
++relaxedCnt;
*RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
return true;
} else {
return false;
@@ -262,6 +279,7 @@ public:
if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
++relaxedCnt;
*RelaxTarget = &MCI;
+ setExtender(Layout.getAssembler().getContext());
return true;
}
}
@@ -276,9 +294,35 @@ public:
llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
}
- void relaxInstruction(MCInst const & /*Inst*/,
- MCInst & /*Res*/) const override {
- llvm_unreachable("relaxInstruction() unimplemented");
+ void relaxInstruction(MCInst const & Inst,
+ MCInst & Res) const override {
+ assert(HexagonMCInstrInfo::isBundle(Inst) &&
+ "Hexagon relaxInstruction only works on bundles");
+
+ Res = HexagonMCInstrInfo::createBundle();
+ // Copy the results into the bundle.
+ bool Update = false;
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+ MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst());
+
+ // if immediate extender needed, add it in
+ if (*RelaxTarget == &CrntHMI) {
+ Update = true;
+ assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) &&
+ "No room to insert extender for relaxation");
+
+ MCInst *HMIx = takeExtender();
+ *HMIx = HexagonMCInstrInfo::deriveExtender(
+ *MCII, CrntHMI,
+ HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI));
+ Res.addOperand(MCOperand::createInst(HMIx));
+ *RelaxTarget = nullptr;
+ }
+ // now copy over the original instruction(the one we may have extended)
+ Res.addOperand(MCOperand::createInst(I.getInst()));
+ }
+ (void)Update;
+ assert(Update && "Didn't find relaxation target");
}
bool writeNopData(uint64_t Count,
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f4d162c..47a6f86 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -44,6 +44,25 @@ namespace HexagonII {
TypeMEMOP = 9,
TypeNV = 10,
TypeDUPLEX = 11,
+ TypeCOMPOUND = 12,
+ TypeCVI_FIRST = 13,
+ TypeCVI_VA = TypeCVI_FIRST,
+ TypeCVI_VA_DV = 14,
+ TypeCVI_VX = 15,
+ TypeCVI_VX_DV = 16,
+ TypeCVI_VP = 17,
+ TypeCVI_VP_VS = 18,
+ TypeCVI_VS = 19,
+ TypeCVI_VINLANESAT= 20,
+ TypeCVI_VM_LD = 21,
+ TypeCVI_VM_TMP_LD = 22,
+ TypeCVI_VM_CUR_LD = 23,
+ TypeCVI_VM_VP_LDU = 24,
+ TypeCVI_VM_ST = 25,
+ TypeCVI_VM_NEW_ST = 26,
+ TypeCVI_VM_STU = 27,
+ TypeCVI_HIST = 28,
+ TypeCVI_LAST = TypeCVI_HIST,
TypePREFIX = 30, // Such as extenders.
TypeENDLOOP = 31 // Such as end of a HW loop.
};
@@ -71,12 +90,16 @@ namespace HexagonII {
PostInc = 6 // Post increment addressing mode
};
+ // MemAccessSize is represented as 1+log2(N) where N is size in bits.
enum class MemAccessSize {
NoMemAccess = 0, // Not a memory acces instruction.
ByteAccess = 1, // Byte access instruction (memb).
HalfWordAccess = 2, // Half word access instruction (memh).
WordAccess = 3, // Word access instruction (memw).
- DoubleWordAccess = 4 // Double word access instruction (memd)
+ DoubleWordAccess = 4, // Double word access instruction (memd)
+ // 5, // We do not have a 16 byte vector access.
+ Vector64Access = 7, // 64 Byte vector access instruction (vmem).
+ Vector128Access = 8 // 128 Byte vector access instruction (vmem).
};
// MCInstrDesc TSFlags
@@ -156,7 +179,7 @@ namespace HexagonII {
AddrModeMask = 0x7,
// Access size for load/store instructions.
MemAccessSizePos = 43,
- MemAccesSizeMask = 0x7,
+ MemAccesSizeMask = 0xf,
// Branch predicted taken.
TakenPos = 47,
@@ -164,7 +187,23 @@ namespace HexagonII {
// Floating-point instructions.
FPPos = 48,
- FPMask = 0x1
+ FPMask = 0x1,
+
+ // New-Value producer-2 instructions.
+ hasNewValuePos2 = 50,
+ hasNewValueMask2 = 0x1,
+
+ // Which operand consumes or produces a new value.
+ NewValueOpPos2 = 51,
+ NewValueOpMask2 = 0x7,
+
+ // Accumulator instructions.
+ AccumulatorPos = 54,
+ AccumulatorMask = 0x1,
+
+ // Complex XU, prevent xu competition by prefering slot3
+ PrefersSlot3Pos = 55,
+ PrefersSlot3Mask = 0x1,
};
// *** The code above must match HexagonInstrFormat*.td *** //
@@ -219,6 +258,26 @@ namespace HexagonII {
INST_PARSE_EXTENDER = 0x00000000
};
+ enum InstIClassBits : unsigned {
+ INST_ICLASS_MASK = 0xf0000000,
+ INST_ICLASS_EXTENDER = 0x00000000,
+ INST_ICLASS_J_1 = 0x10000000,
+ INST_ICLASS_J_2 = 0x20000000,
+ INST_ICLASS_LD_ST_1 = 0x30000000,
+ INST_ICLASS_LD_ST_2 = 0x40000000,
+ INST_ICLASS_J_3 = 0x50000000,
+ INST_ICLASS_CR = 0x60000000,
+ INST_ICLASS_ALU32_1 = 0x70000000,
+ INST_ICLASS_XTYPE_1 = 0x80000000,
+ INST_ICLASS_LD = 0x90000000,
+ INST_ICLASS_ST = 0xa0000000,
+ INST_ICLASS_ALU32_2 = 0xb0000000,
+ INST_ICLASS_XTYPE_2 = 0xc0000000,
+ INST_ICLASS_XTYPE_3 = 0xd0000000,
+ INST_ICLASS_XTYPE_4 = 0xe0000000,
+ INST_ICLASS_ALU32_3 = 0xf0000000
+ };
+
} // End namespace HexagonII.
} // End namespace llvm.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 36f8146..06ccec5 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -12,13 +12,13 @@
//===----------------------------------------------------------------------===//
#include "HexagonAsmPrinter.h"
-#include "Hexagon.h"
#include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -28,104 +28,33 @@ using namespace llvm;
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
-HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter)
- : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {}
-
-void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
- StringRef Annot,
- MCSubtargetInfo const &STI) {
- assert(HexagonMCInstrInfo::isBundle(*MI));
- assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
- std::string Buffer;
- {
- raw_string_ostream TempStream(Buffer);
- RawPrinter->printInst(MI, TempStream, "", STI);
- }
- StringRef Contents(Buffer);
- auto PacketBundle = Contents.rsplit('\n');
- auto HeadTail = PacketBundle.first.split('\n');
- auto Preamble = "\t{\n\t\t";
- auto Separator = "";
- while(!HeadTail.first.empty()) {
- O << Separator;
- StringRef Inst;
- auto Duplex = HeadTail.first.split('\v');
- if(!Duplex.second.empty()){
- O << Duplex.first << "\n";
- Inst = Duplex.second;
- }
- else
- Inst = Duplex.first;
- O << Preamble;
- O << Inst;
- HeadTail = HeadTail.second.split('\n');
- Preamble = "";
- Separator = "\n\t\t";
- }
- O << "\n\t}" << PacketBundle.second;
-}
-
-void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
- RawPrinter->printRegName(O, RegNo);
-}
-
-// Return the minimum value that a constant extendable operand can have
-// without being extended.
-static int getMinValue(uint64_t TSFlags) {
- unsigned isSigned =
- (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits =
- (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned)
- return -1U << (bits - 1);
-
- return 0;
-}
-
-// Return the maximum value that a constant extendable operand can have
-// without being extended.
-static int getMaxValue(uint64_t TSFlags) {
- unsigned isSigned =
- (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits =
- (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
- if (isSigned)
- return ~(-1U << (bits - 1));
-
- return ~(-1U << bits);
-}
-
-// Return true if the instruction must be extended.
-static bool isExtended(uint64_t TSFlags) {
- return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
-}
-
-// Currently just used in an assert statement
-static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED;
-// Return true if the instruction may be extended based on the operand value.
-static bool isExtendable(uint64_t TSFlags) {
- return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
+ MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI)
+ : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
}
StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
return MII.getName(Opcode);
}
-void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << getRegisterName(RegNo);
+void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+ O << getRegName(RegNo);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
}
void HexagonInstPrinter::setExtender(MCInst const &MCI) {
HasExtender = HexagonMCInstrInfo::isImmext(MCI);
}
-void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
- StringRef Annot,
- MCSubtargetInfo const &STI) {
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+ StringRef Annot, const MCSubtargetInfo &STI) {
assert(HexagonMCInstrInfo::isBundle(*MI));
assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+ assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
HasExtender = false;
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
MCInst const &MCI = *I.getInst();
@@ -157,145 +86,148 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
}
}
-void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- const MCOperand& MO = MI->getOperand(OpNo);
-
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo &&
+ (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)))
+ O << "#";
+ MCOperand const &MO = MI->getOperand(OpNo);
if (MO.isReg()) {
- printRegName(O, MO.getReg());
- } else if(MO.isExpr()) {
- MO.getExpr()->print(O, &MAI);
- } else if(MO.isImm()) {
- printImmOperand(MI, OpNo, O);
- } else {
- llvm_unreachable("Unknown operand");
- }
-}
-
-void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO = MI->getOperand(OpNo);
-
- if(MO.isExpr()) {
- MO.getExpr()->print(O, &MAI);
- } else if(MO.isImm()) {
- O << MI->getOperand(OpNo).getImm();
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isExpr()) {
+ int64_t Value;
+ if (MO.getExpr()->evaluateAsAbsolute(Value))
+ O << formatImm(Value);
+ else
+ O << *MO.getExpr();
} else {
llvm_unreachable("Unknown operand");
}
}
-void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- const MCOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
- assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
- "Expecting an extendable operand");
-
- if (MO.isExpr() || isExtended(MII.TSFlags)) {
- O << "#";
- } else if (MO.isImm()) {
- int ImmValue = MO.getImm();
- if (ImmValue < getMinValue(MII.TSFlags) ||
- ImmValue > getMaxValue(MII.TSFlags))
- O << "#";
- }
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
- unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
+ unsigned OpNo,
+ raw_ostream &O) const {
O << MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
O << -MI->getOperand(OpNo).getImm();
}
-void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
O << -1;
}
-void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO0 = MI->getOperand(OpNo);
- const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<9>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
- printRegName(O, MO0.getReg());
- O << " + #" << MO1.getImm();
+void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
}
-void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand& MO0 = MI->getOperand(OpNo);
- const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<10>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+ O << formatImm(Imm/64);
+}
- printRegName(O, MO0.getReg());
- O << ", #" << MO1.getImm();
+void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ int64_t Imm;
+ bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+ Imm = SignExtend64<11>(Imm);
+ assert(Success); (void)Success;
+ assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+ O << formatImm(Imm/128);
}
-void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
- assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
printOperand(MI, OpNo, O);
}
-void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
// Branches can take an immediate operand. This is used by the branch
// selection pass to print $+8, an eight byte displacement from the PC.
llvm_unreachable("Unknown branch operand.");
}
-void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
-}
+void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {}
-void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
raw_ostream &O, bool hi) const {
- assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
+ MCOperand const &MO = MI->getOperand(OpNo);
- O << '#' << (hi ? "HI" : "LO") << "(#";
- printOperand(MI, OpNo, O);
+ O << '#' << (hi ? "HI" : "LO") << '(';
+ if (MO.isImm()) {
+ O << '#';
+ printOperand(MI, OpNo, O);
+ } else {
+ printOperand(MI, OpNo, O);
+ assert("Unknown symbol operand");
+ }
O << ')';
}
-void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const {
- const MCOperand &MO = MI->getOperand(OpNo);
- const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
- assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
- "Expecting an extendable operand");
-
- if (MO.isExpr() || isExtended(MII.TSFlags)) {
- O << "##";
+void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const {
+ MCOperand const &MO = MI->getOperand(OpNo);
+ assert (MO.isExpr());
+ MCExpr const &Expr = *MO.getExpr();
+ int64_t Value;
+ if (Expr.evaluateAsAbsolute(Value))
+ O << format("0x%" PRIx64, Value);
+ else {
+ if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))
+ if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo)
+ O << "##";
+ O << Expr;
}
- printOperand(MI, OpNo, O);
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 534ac23..5f42118 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
//
-// This class prints an Hexagon MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
@@ -15,17 +14,8 @@
#define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
namespace llvm {
-class HexagonAsmInstPrinter : public MCInstPrinter {
-public:
- HexagonAsmInstPrinter(MCInstPrinter *RawPrinter);
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
- MCSubtargetInfo const &STI) override;
- void printRegName(raw_ostream &O, unsigned RegNo) const override;
- std::unique_ptr<MCInstPrinter> RawPrinter;
-};
/// Prints bundles as a newline separated list of individual instructions
/// Duplexes are separated by a vertical tab \v character
/// A trailing line includes bundle properties such as endloop0/1
@@ -33,68 +23,69 @@ public:
/// r0 = add(r1, r2)
/// r0 = #0 \v jump 0x0
/// :endloop0 :endloop1
- class HexagonInstPrinter : public MCInstPrinter {
- public:
- explicit HexagonInstPrinter(MCAsmInfo const &MAI,
- MCInstrInfo const &MII,
- MCRegisterInfo const &MRI)
- : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
-
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
- const MCSubtargetInfo &STI) override;
- virtual StringRef getOpcodeName(unsigned Opcode) const;
- void printInstruction(const MCInst *MI, raw_ostream &O);
- void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- static const char *getRegisterName(unsigned RegNo);
+class HexagonInstPrinter : public MCInstPrinter {
+public:
+ explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
+ MCRegisterInfo const &MRI);
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ void printInstruction(MCInst const *MI, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) const;
- void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
- const;
- void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
- void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+ StringRef getRegName(unsigned RegNo) const;
+ static char const *getRegisterName(unsigned RegNo);
+ void printRegName(raw_ostream &O, unsigned RegNo) const override;
- void printConstantPool(const MCInst *MI, unsigned OpNo,
+ void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNegImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
+ void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printBranchOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printPredicateOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printGlobalOperand(MCInst const *MI, unsigned OpNo,
+ raw_ostream &O) const;
+ void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+ void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+
+ void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
- void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
- { printSymbol(MI, OpNo, O, true); }
- void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
- { printSymbol(MI, OpNo, O, false); }
+ void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, true);
+ }
+ void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+ printSymbol(MI, OpNo, O, false);
+ }
- const MCInstrInfo &getMII() const {
- return MII;
- }
+ MCAsmInfo const &getMAI() const { return MAI; }
+ MCInstrInfo const &getMII() const { return MII; }
- protected:
- void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
- const;
+protected:
+ void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
+ bool hi) const;
- private:
- const MCInstrInfo &MII;
+private:
+ MCInstrInfo const &MII;
- bool HasExtender;
- void setExtender(MCInst const &MCI);
- };
+ bool HasExtender;
+ void setExtender(MCInst const &MCI);
+};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index dc07069..a8456b4 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -18,13 +18,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class HexagonMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit HexagonMCAsmInfo(const Triple &TT);
- };
+class HexagonMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit HexagonMCAsmInfo(const Triple &TT);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
new file mode 100644
index 0000000..46b7b41
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -0,0 +1,581 @@
+//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCChecker.h"
+
+#include "HexagonBaseInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
+ cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+
+const HexagonMCChecker::PredSense
+ HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+
+void HexagonMCChecker::init() {
+ // Initialize read-only registers set.
+ ReadOnly.insert(Hexagon::PC);
+
+ // Figure out the loop-registers definitions.
+ if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+ Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC0].insert(Unconditional);
+ }
+ if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0?
+ Defs[Hexagon::LC1].insert(Unconditional);
+ }
+
+ if (HexagonMCInstrInfo::isBundle(MCB))
+ // Unfurl a bundle.
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ init(*I.getInst());
+ }
+ else
+ init(MCB);
+}
+
+void HexagonMCChecker::init(MCInst const& MCI) {
+ const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+ unsigned PredReg = Hexagon::NoRegister;
+ bool isTrue = false;
+
+ // Get used registers.
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned R = MCI.getOperand(i).getReg();
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ // Note an used predicate register.
+ PredReg = R;
+ isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+ // Note use of new predicate register.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ NewPreds.insert(PredReg);
+ }
+ else
+ // Note register use. Super-registers are not tracked directly,
+ // but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers used indirectly.
+ Uses.insert(*SRI);
+ }
+
+ // Get implicit register definitions.
+ if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
+ for (; *ImpDef; ++ImpDef) {
+ unsigned R = *ImpDef;
+
+ if (Hexagon::R31 != R && MCID.isCall())
+ // Any register other than the LR and the PC are actually volatile ones
+ // as defined by the ABI, not modified implicitly by the call insn.
+ continue;
+ if (Hexagon::PC == R)
+ // Branches are the only insns that can change the PC,
+ // otherwise a read-only register.
+ continue;
+
+ if (Hexagon::USR_OVF == R)
+ // Many insns change the USR implicitly, but only one or another flag.
+ // The instruction table models the USR.OVF flag, which can be implicitly
+ // modified more than once, but cannot be modified in the same packet
+ // with an instruction that modifies is explicitly. Deal with such situ-
+ // ations individually.
+ SoftDefs.insert(R);
+ else if (isPredicateRegister(R) &&
+ HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
+ // Include implicit late predicates.
+ LatePreds.insert(R);
+ else
+ Defs[R].insert(PredSense(PredReg, isTrue));
+ }
+
+ // Figure out explicit register definitions.
+ for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
+ unsigned R = MCI.getOperand(i).getReg(),
+ S = Hexagon::NoRegister;
+
+ // Note register definitions, direct ones as well as indirect side-effects.
+ // Super-registers are not tracked directly, but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI) {
+ if (MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers defined indirectly.
+ continue;
+
+ if (R == *SRI) {
+ if (S == R)
+ // Avoid scoring the defined register multiple times.
+ continue;
+ else
+ // Note that the defined register has already been scored.
+ S = R;
+ }
+
+ if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI)
+ // P3:0 is a special case, since multiple predicate register definitions
+ // in a packet is allowed as the equivalent of their logical "and".
+ // Only an explicit definition of P3:0 is noted as such; if a
+ // side-effect, then note as a soft definition.
+ SoftDefs.insert(*SRI);
+ else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+ // Some insns produce predicates too late to be used in the same packet.
+ LatePreds.insert(*SRI);
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+ // Current loads should be used in the same packet.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+ // Temporary loads should be used in the same packet, but don't commit
+ // results, so it should be disregarded if another insn changes the same
+ // register.
+ // TODO: relies on the impossibility of a current and a temporary loads
+ // in the same packet.
+ TmpDefs.insert(*SRI);
+ else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+ // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
+ // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
+ Uses.insert(*SRI);
+ else
+ Defs[*SRI].insert(PredSense(PredReg, isTrue));
+ }
+ }
+
+ // Figure out register definitions that produce new values.
+ if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) {
+ unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (HexagonMCInstrInfo::isCompound(MCII, MCI))
+ compoundRegisterMap(R); // Compound insns have a limited register range.
+
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // No super-registers defined indirectly.
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+
+ // For fairly unique 2-dot-new producers, example:
+ // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
+ if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
+ unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
+
+ for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+ HexagonMCInstrInfo::isFloat(MCII, MCI)));
+ }
+ }
+
+ // Figure out definitions of new predicate registers.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+ if (MCI.getOperand(i).isReg()) {
+ unsigned P = MCI.getOperand(i).getReg();
+
+ if (isPredicateRegister(P))
+ NewPreds.insert(P);
+ }
+
+ // Figure out uses of new values.
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) {
+ unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+ if (!MCSubRegIterator(N, &RI).isValid()) {
+ // Super-registers cannot use new values.
+ if (MCID.isBranch())
+ NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+ else
+ NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+ }
+ }
+}
+
+HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
+ MCRegisterInfo const &ri)
+ : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
+ bLoadErrInfo(false) {
+ init();
+}
+
+bool HexagonMCChecker::check() {
+ bool chkB = checkBranches();
+ bool chkP = checkPredicates();
+ bool chkNV = checkNewValues();
+ bool chkR = checkRegisters();
+ bool chkS = checkSolo();
+ bool chkSh = checkShuffle();
+ bool chkSl = checkSlots();
+ bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+
+ return chk;
+}
+
+bool HexagonMCChecker::checkSlots()
+
+{
+ unsigned slotsUsed = 0;
+ for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
+ MCInst const& MCI = *HMI.getInst();
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
+ slotsUsed += 2;
+ else
+ ++slotsUsed;
+ }
+
+ if (slotsUsed > HEXAGON_PACKET_SIZE) {
+ HexagonMCErrInfo errInfo;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+// Check legal use of branches.
+bool HexagonMCChecker::checkBranches() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB)) {
+ bool hasConditional = false;
+ unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
+ NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+ Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
+
+ for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
+ i < MCB.size(); ++i) {
+ MCInst const &MCI = *MCB.begin()[i].getInst();
+
+ if (HexagonMCInstrInfo::isImmext(MCI))
+ continue;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
+ ++Branches;
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ ++NewIndirectBranches;
+ if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
+ ++NewValueBranches;
+
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
+ HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
+ hasConditional = true;
+ Conditional = i; // Record the position of the conditional branch.
+ } else {
+ Unconditional = i; // Record the position of the unconditional branch.
+ }
+ }
+ if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
+ ++Returns;
+ }
+
+ if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
+ if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB)) {
+ // Error out if there's any branch in a loop-end packet.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (Branches > 1)
+ if (!hasConditional || Conditional > Unconditional) {
+ // Error out if more than one unconditional branch or
+ // the conditional branch appears after the unconditional one.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of predicate registers.
+bool HexagonMCChecker::checkPredicates() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper use of new predicate registers.
+ for (const auto& I : NewPreds) {
+ unsigned P = I;
+
+ if (!Defs.count(P) || LatePreds.count(P)) {
+ // Error out if the new predicate register is not defined,
+ // or defined "late"
+ // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ // Check for proper use of auto-anded of predicate registers.
+ for (const auto& I : LatePreds) {
+ unsigned P = I;
+
+ if (LatePreds.count(P) > 1 || Defs.count(P)) {
+ // Error out if predicate register defined "late" multiple times or
+ // defined late and regularly defined
+ // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check legal use of new values.
+bool HexagonMCChecker::checkNewValues() {
+ HexagonMCErrInfo errInfo;
+ memset(&errInfo, 0, sizeof(errInfo));
+ for (auto& I : NewUses) {
+ unsigned R = I.first;
+ NewSense &US = I.second;
+
+ if (!hasValidNewValueDef(US, NewDefs[R])) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Check for legal register uses and definitions.
+bool HexagonMCChecker::checkRegisters() {
+ HexagonMCErrInfo errInfo;
+ // Check for proper register definitions.
+ for (const auto& I : Defs) {
+ unsigned R = I.first;
+
+ if (ReadOnly.count(R)) {
+ // Error out for definitions of read-only registers.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (isLoopRegister(R) && Defs.count(R) > 1 &&
+ (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+ HexagonMCInstrInfo::isOuterLoop(MCB))) {
+ // Error out for definitions of loop registers at the end of a loop.
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (SoftDefs.count(R)) {
+ // Error out for explicit changes to registers also weakly defined
+ // (e.g., "{ usr = r0; r0 = sfadd(...) }").
+ unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+ // Check for multiple register definitions.
+ PredSet &PM = Defs[R];
+
+ // Check for multiple unconditional register definitions.
+ if (PM.count(Unconditional)) {
+ // Error out on an unconditional change when there are any other
+ // changes, conditional or not.
+ unsigned UsrR = Hexagon::USR;
+ unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for multiple conditional register definitions.
+ for (const auto& J : PM) {
+ PredSense P = J;
+
+ // Check for multiple uses of the same condition.
+ if (PM.count(P) > 1) {
+ // Error out on conditional changes based on the same predicate
+ // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ // Check for the use of the complementary condition.
+ P.second = !P.second;
+ if (PM.count(P) && PM.size() > 2) {
+ // Error out on conditional changes based on the same predicate
+ // multiple times
+ // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+ }
+
+ // Check for use of current definitions.
+ for (const auto& I : CurDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // Warn on an unused current definition.
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+
+ // Check for use of temporary definitions.
+ for (const auto& I : TmpDefs) {
+ unsigned R = I;
+
+ if (!Uses.count(R)) {
+ // special case for vhist
+ bool vHistFound = false;
+ for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
+ vHistFound = true; // vhist() implicitly uses ALL REGxx.tmp
+ break;
+ }
+ }
+ // Warn on an unused temporary definition.
+ if (vHistFound == false) {
+ errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
+ addErrInfo(errInfo);
+ return true;
+ }
+ }
+ }
+
+ return true;
+}
+
+// Check for legal use of solo insns.
+bool HexagonMCChecker::checkSolo() {
+ HexagonMCErrInfo errInfo;
+ if (HexagonMCInstrInfo::isBundle(MCB) &&
+ HexagonMCInstrInfo::bundleSize(MCB) > 1) {
+ for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+bool HexagonMCChecker::checkShuffle() {
+ HexagonMCErrInfo errInfo;
+ // Branch info is lost when duplexing. The unduplexed insns must be
+ // checked and only branch errors matter for this case.
+ HexagonMCShuffler MCS(MCII, STI, MCB);
+ if (!MCS.check()) {
+ if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCS.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ }
+ HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+ if (!MCSDX.check()) {
+ errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+ errInfo.setShuffleError(MCSDX.getError());
+ addErrInfo(errInfo);
+ return false;
+ }
+ return true;
+}
+
+void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+ switch (Register) {
+ default:
+ break;
+ case Hexagon::R15:
+ Register = Hexagon::R23;
+ break;
+ case Hexagon::R14:
+ Register = Hexagon::R22;
+ break;
+ case Hexagon::R13:
+ Register = Hexagon::R21;
+ break;
+ case Hexagon::R12:
+ Register = Hexagon::R20;
+ break;
+ case Hexagon::R11:
+ Register = Hexagon::R19;
+ break;
+ case Hexagon::R10:
+ Register = Hexagon::R18;
+ break;
+ case Hexagon::R9:
+ Register = Hexagon::R17;
+ break;
+ case Hexagon::R8:
+ Register = Hexagon::R16;
+ break;
+ }
+}
+
+bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const {
+ bool Strict = !RelaxNVChecks;
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ const NewSense &Def = Defs[i];
+ // NVJ cannot use a new FP value [7.6.1]
+ if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0))
+ continue;
+ // If the definition was not predicated, then it does not matter if
+ // the use is.
+ if (Def.PredReg == 0)
+ return true;
+ // With the strict checks, both the definition and the use must be
+ // predicated on the same register and condition.
+ if (Strict) {
+ if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond)
+ return true;
+ } else {
+ // With the relaxed checks, if the definition was predicated, the only
+ // detectable violation is if the use is predicated on the opposing
+ // condition, otherwise, it's ok.
+ if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond)
+ return true;
+ }
+ }
+ return false;
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
new file mode 100644
index 0000000..5fc0bde
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -0,0 +1,218 @@
+//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCHECKER_H
+#define HEXAGONMCCHECKER_H
+
+#include <map>
+#include <set>
+#include <queue>
+#include "MCTargetDesc/HexagonMCShuffler.h"
+
+using namespace llvm;
+
+namespace llvm {
+class MCOperandInfo;
+
+typedef struct {
+ unsigned Error, Warning, ShuffleError;
+ unsigned Register;
+} ErrInfo_T;
+
+class HexagonMCErrInfo {
+public:
+ enum {
+ CHECK_SUCCESS = 0,
+ // Errors.
+ CHECK_ERROR_BRANCHES = 0x00001,
+ CHECK_ERROR_NEWP = 0x00002,
+ CHECK_ERROR_NEWV = 0x00004,
+ CHECK_ERROR_REGISTERS = 0x00008,
+ CHECK_ERROR_READONLY = 0x00010,
+ CHECK_ERROR_LOOP = 0x00020,
+ CHECK_ERROR_ENDLOOP = 0x00040,
+ CHECK_ERROR_SOLO = 0x00080,
+ CHECK_ERROR_SHUFFLE = 0x00100,
+ CHECK_ERROR_NOSLOTS = 0x00200,
+ CHECK_ERROR_UNKNOWN = 0x00400,
+ // Warnings.
+ CHECK_WARN_CURRENT = 0x10000,
+ CHECK_WARN_TEMPORARY = 0x20000
+ };
+ ErrInfo_T s;
+
+ void reset() {
+ s.Error = CHECK_SUCCESS;
+ s.Warning = CHECK_SUCCESS;
+ s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
+ s.Register = Hexagon::NoRegister;
+ };
+ HexagonMCErrInfo() {
+ reset();
+ };
+
+ void setError(unsigned e, unsigned r = Hexagon::NoRegister)
+ { s.Error = e; s.Register = r; };
+ void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
+ { s.Warning = w; s.Register = r; };
+ void setShuffleError(unsigned e) { s.ShuffleError = e; };
+};
+
+/// Check for a valid bundle.
+class HexagonMCChecker {
+ /// Insn bundle.
+ MCInst& MCB;
+ MCInst& MCBDX;
+ const MCRegisterInfo& RI;
+ MCInstrInfo const &MCII;
+ MCSubtargetInfo const &STI;
+ bool bLoadErrInfo;
+
+ /// Set of definitions: register #, if predicated, if predicated true.
+ typedef std::pair<unsigned, bool> PredSense;
+ static const PredSense Unconditional;
+ typedef std::multiset<PredSense> PredSet;
+ typedef std::multiset<PredSense>::iterator PredSetIterator;
+
+ typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator;
+ llvm::DenseMap<unsigned, PredSet> Defs;
+
+ /// Information about how a new-value register is defined or used:
+ /// PredReg = predicate register, 0 if use/def not predicated,
+ /// Cond = true/false for if(PredReg)/if(!PredReg) respectively,
+ /// IsFloat = true if definition produces a floating point value
+ /// (not valid for uses),
+ /// IsNVJ = true if the use is a new-value branch (not valid for
+ /// definitions).
+ struct NewSense {
+ unsigned PredReg;
+ bool IsFloat, IsNVJ, Cond;
+ // The special-case "constructors":
+ static NewSense Jmp(bool isNVJ) {
+ NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
+ /*Cond=*/ false };
+ return NS;
+ }
+ static NewSense Use(unsigned PR, bool True) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ static NewSense Def(unsigned PR, bool True, bool Float) {
+ NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
+ /*Cond=*/ True };
+ return NS;
+ }
+ };
+ /// Set of definitions that produce new register:
+ typedef llvm::SmallVector<NewSense,2> NewSenseList;
+ typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
+ llvm::DenseMap<unsigned, NewSenseList> NewDefs;
+
+ /// Set of weak definitions whose clashes should be enforced selectively.
+ typedef std::set<unsigned>::iterator SoftDefsIterator;
+ std::set<unsigned> SoftDefs;
+
+ /// Set of current definitions committed to the register file.
+ typedef std::set<unsigned>::iterator CurDefsIterator;
+ std::set<unsigned> CurDefs;
+
+ /// Set of temporary definitions not committed to the register file.
+ typedef std::set<unsigned>::iterator TmpDefsIterator;
+ std::set<unsigned> TmpDefs;
+
+ /// Set of new predicates used.
+ typedef std::set<unsigned>::iterator NewPredsIterator;
+ std::set<unsigned> NewPreds;
+
+ /// Set of predicates defined late.
+ typedef std::multiset<unsigned>::iterator LatePredsIterator;
+ std::multiset<unsigned> LatePreds;
+
+ /// Set of uses.
+ typedef std::set<unsigned>::iterator UsesIterator;
+ std::set<unsigned> Uses;
+
+ /// Set of new values used: new register, if new-value jump.
+ typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator;
+ llvm::DenseMap<unsigned, NewSense> NewUses;
+
+ /// Pre-defined set of read-only registers.
+ typedef std::set<unsigned>::iterator ReadOnlyIterator;
+ std::set<unsigned> ReadOnly;
+
+ std::queue<ErrInfo_T> ErrInfoQ;
+ HexagonMCErrInfo CrntErrInfo;
+
+ void getErrInfo() {
+ if (bLoadErrInfo == true) {
+ if (ErrInfoQ.empty()) {
+ CrntErrInfo.reset();
+ } else {
+ CrntErrInfo.s = ErrInfoQ.front();
+ ErrInfoQ.pop();
+ }
+ }
+ bLoadErrInfo = false;
+ }
+
+ void init();
+ void init(MCInst const&);
+
+ // Checks performed.
+ bool checkBranches();
+ bool checkPredicates();
+ bool checkNewValues();
+ bool checkRegisters();
+ bool checkSolo();
+ bool checkShuffle();
+ bool checkSlots();
+
+ static void compoundRegisterMap(unsigned&);
+
+ bool isPredicateRegister(unsigned R) const {
+ return (Hexagon::P0 == R || Hexagon::P1 == R ||
+ Hexagon::P2 == R || Hexagon::P3 == R);
+ };
+ bool isLoopRegister(unsigned R) const {
+ return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
+ Hexagon::SA1 == R || Hexagon::LC1 == R);
+ };
+
+ bool hasValidNewValueDef(const NewSense &Use,
+ const NewSenseList &Defs) const;
+
+ public:
+ explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
+ const MCRegisterInfo& ri);
+
+ bool check();
+
+ /// add a new error/warning
+ void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
+
+ /// Return the error code for the last operation in the insn bundle.
+ unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
+ unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
+ unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
+ unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
+ bool getNextErrInfo() {
+ bLoadErrInfo = true;
+ return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
+ }
+};
+
+}
+
+#endif // HEXAGONMCCHECKER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 9fc4e2a..c2c6275 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -96,6 +96,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
assert(!HexagonMCInstrInfo::isBundle(HMB));
uint64_t Binary;
+ // Compound instructions are limited to using registers 0-7 and 16-23
+ // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
+ static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10,
+ Hexagon::R11, Hexagon::R12, Hexagon::R13,
+ Hexagon::R14, Hexagon::R15};
+
// Pseudo instructions don't get encoded and shouldn't be here
// in the first place!
assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
@@ -104,6 +110,16 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
" `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
"\n");
+ if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
+ for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
+ if (HMB.getOperand(i).isReg()) {
+ unsigned Reg =
+ MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
+ if ((Reg <= 23) && (Reg >= 16))
+ HMB.getOperand(i).setReg(RegMap[Reg - 16]);
+ }
+ }
+
if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
// Calculate the new value distance to the associated producer
MCOperand &MCO =
@@ -318,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
// The only relocs left should be GP relative:
default:
if (MCID.mayStore() || MCID.mayLoad()) {
- for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
++ImpUses) {
if (*ImpUses == Hexagon::GP) {
switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
@@ -389,10 +405,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
return cast<MCConstantExpr>(ME)->getValue();
}
if (MK == MCExpr::Binary) {
- unsigned Res;
- Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
- Res +=
- getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+ getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
return 0;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 886f8db..d194bea 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -115,8 +115,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
SrcReg = MI.getOperand(1).getReg();
if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) ||
- (MI.getOperand(2).getImm() == -1)))
+ (HexagonMCInstrInfo::inRange<5>(MI, 2) ||
+ HexagonMCInstrInfo::minConstant(MI, 2) == -1))
return HexagonII::HCG_A;
break;
case Hexagon::A2_tfr:
@@ -134,8 +134,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
return false;
// Rd = #u6
DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 &&
- MI.getOperand(1).getImm() >= 0 &&
+ if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
+ HexagonMCInstrInfo::minConstant(MI, 1) >= 0 &&
HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
return HexagonII::HCG_A;
break;
@@ -145,9 +145,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
DstReg = MI.getOperand(0).getReg();
Src1Reg = MI.getOperand(1).getReg();
if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
- MI.getOperand(2).isImm() &&
HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- (MI.getOperand(2).getImm() == 0))
+ HexagonMCInstrInfo::minConstant(MI, 2) == 0)
return HexagonII::HCG_A;
break;
// The fact that .new form is used pretty much guarantees
@@ -206,6 +205,8 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
MCInst *CompoundInsn = 0;
unsigned compoundOpcode;
MCOperand Rs, Rt;
+ int64_t Value;
+ bool Success;
switch (L.getOpcode()) {
default:
@@ -277,7 +278,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
case Hexagon::C2_cmpeqi:
DEBUG(dbgs() << "CX: C2_cmpeqi\n");
- if (L.getOperand(2).getImm() == -1)
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
else
compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
@@ -286,14 +290,17 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
CompoundInsn = new (Context) MCInst;
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
- if (L.getOperand(2).getImm() != -1)
+ if (Value != -1)
CompoundInsn->addOperand(L.getOperand(2));
CompoundInsn->addOperand(R.getOperand(1));
break;
case Hexagon::C2_cmpgti:
DEBUG(dbgs() << "CX: C2_cmpgti\n");
- if (L.getOperand(2).getImm() == -1)
+ Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ (void)Success;
+ assert(Success);
+ if (Value == -1)
compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
else
compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
@@ -302,7 +309,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
CompoundInsn = new (Context) MCInst;
CompoundInsn->setOpcode(compoundOpcode);
CompoundInsn->addOperand(Rs);
- if (L.getOperand(2).getImm() != -1)
+ if (Value != -1)
CompoundInsn->addOperand(L.getOperand(2));
CompoundInsn->addOperand(R.getOperand(1));
break;
@@ -404,7 +411,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
/// additional slot.
void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
MCContext &Context, MCInst &MCI) {
- assert(MCI.getOpcode() == Hexagon::BUNDLE &&
+ assert(HexagonMCInstrInfo::isBundle(MCI) &&
"Non-Bundle where Bundle expected");
// By definition a compound must have 2 insn.
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 7e9247c..e6194f6 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -26,7 +26,7 @@ using namespace Hexagon;
#define DEBUG_TYPE "hexagon-mcduplex-info"
// pair table of subInstructions with opcodes
-static std::pair<unsigned, unsigned> opcodeData[] = {
+static const std::pair<unsigned, unsigned> opcodeData[] = {
std::make_pair((unsigned)V4_SA1_addi, 0),
std::make_pair((unsigned)V4_SA1_addrx, 6144),
std::make_pair((unsigned)V4_SA1_addsp, 3072),
@@ -81,8 +81,7 @@ static std::pair<unsigned, unsigned> opcodeData[] = {
std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
static std::map<unsigned, unsigned>
- subinstOpcodeMap(opcodeData,
- opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0]));
+ subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData));
bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
switch (Ga) {
@@ -195,15 +194,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Special case this one from Group L2.
// Rd = memw(r29+#u5:2)
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) {
+ if (HexagonMCInstrInfo::isIntReg(SrcReg) &&
+ Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
// Rd = memw(Rs+#u4:2)
if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(2).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) {
+ inRange<4, 2>(MCI, 2)) {
return HexagonII::HSIG_L1;
}
}
@@ -214,7 +211,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) {
+ inRange<4>(MCI, 2)) {
return HexagonII::HSIG_L1;
}
break;
@@ -235,8 +232,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) {
+ inRange<3, 1>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -246,7 +242,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) {
+ inRange<3>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -256,8 +252,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) {
+ inRange<5, 3>(MCI, 2)) {
return HexagonII::HSIG_L2;
}
break;
@@ -326,15 +321,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() &&
- isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) {
+ Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
// memw(Rs+#u4:2) = Rt
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) {
+ inRange<4, 2>(MCI, 1)) {
return HexagonII::HSIG_S1;
}
break;
@@ -344,7 +337,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) {
+ inRange<4>(MCI, 1)) {
return HexagonII::HSIG_S1;
}
break;
@@ -363,8 +356,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) {
+ inRange<3, 1>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
break;
@@ -374,8 +366,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
Src2Reg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
- MCI.getOperand(1).isImm() &&
- isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) {
+ inSRange<6, 3>(MCI, 1)) {
return HexagonII::HSIG_S2;
}
break;
@@ -383,9 +374,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// memw(Rs+#u4:2) = #U1
Src1Reg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- MCI.getOperand(1).isImm() &&
- isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) &&
- MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+ inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) {
return HexagonII::HSIG_S2;
}
break;
@@ -393,16 +382,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// memb(Rs+#u4) = #U1
Src1Reg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
- MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) &&
- MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+ inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) {
return HexagonII::HSIG_S2;
}
break;
case Hexagon::S2_allocframe:
- if (MCI.getOperand(0).isImm() &&
- isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) {
+ if (inRange<5, 3>(MCI, 0))
return HexagonII::HSIG_S2;
- }
break;
//
// Group A:
@@ -428,8 +414,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
// Rd = add(r29,#u6:2)
if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
- MCI.getOperand(2).isImm() &&
- isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) {
+ inRange<6, 2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
// Rx = add(Rx,#s7)
@@ -439,8 +424,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Rd = add(Rs,#1)
// Rd = add(Rs,#-1)
if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
- (MCI.getOperand(2).getImm() == -1))) {
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) {
return HexagonII::HSIG_A;
}
}
@@ -460,8 +444,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
- (MCI.getOperand(2).getImm() == 255))) {
+ (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) {
return HexagonII::HSIG_A;
}
break;
@@ -491,8 +474,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
DstReg = MCI.getOperand(0).getReg(); // Rd
PredReg = MCI.getOperand(1).getReg(); // P0
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
- Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() &&
- MCI.getOperand(2).getImm() == 0) {
+ Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -502,7 +484,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (Hexagon::P0 == DstReg &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) {
+ inRange<2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
break;
@@ -511,10 +493,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
// Rdd = combine(#u2,#U2)
DstReg = MCI.getOperand(0).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
- // TODO: Handle Globals/Symbols
- (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) &&
- ((MCI.getOperand(2).isImm() &&
- isUInt<2>(MCI.getOperand(2).getImm())))) {
+ inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) {
return HexagonII::HSIG_A;
}
break;
@@ -524,7 +503,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(1).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) {
+ minConstant(MCI, 2) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -534,7 +513,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
SrcReg = MCI.getOperand(2).getReg();
if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
- (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) {
+ minConstant(MCI, 1) == 0) {
return HexagonII::HSIG_A;
}
break;
@@ -556,19 +535,17 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
}
bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
-
unsigned DstReg, SrcReg;
-
switch (potentialDuplex.getOpcode()) {
case Hexagon::A2_addi:
// testing for case of: Rx = add(Rx,#s7)
DstReg = potentialDuplex.getOperand(0).getReg();
SrcReg = potentialDuplex.getOperand(1).getReg();
if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (potentialDuplex.getOperand(2).isExpr())
+ int64_t Value;
+ if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value))
return true;
- if (potentialDuplex.getOperand(2).isImm() &&
- !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm())))
+ if (!isShiftedInt<7, 0>(Value))
return true;
}
break;
@@ -576,15 +553,14 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
DstReg = potentialDuplex.getOperand(0).getReg();
if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
- if (potentialDuplex.getOperand(1).isExpr())
+ int64_t Value;
+ if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value))
return true;
// Check for case of Rd = #-1.
- if (potentialDuplex.getOperand(1).isImm() &&
- (potentialDuplex.getOperand(1).getImm() == -1))
+ if (Value == -1)
return false;
// Check for case of Rd = #u6.
- if (potentialDuplex.getOperand(1).isImm() &&
- !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm()))
+ if (!isShiftedUInt<6, 0>(Value))
return true;
}
break;
@@ -712,19 +688,23 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
MCInst Result;
+ bool Absolute;
+ int64_t Value;
switch (Inst.getOpcode()) {
default:
// dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
llvm_unreachable("Unimplemented subinstruction \n");
break;
case Hexagon::A2_addi:
- if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
Result.setOpcode(Hexagon::V4_SA1_inc);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break;
} // 1,2 SUBInst $Rd = add($Rs, #1)
- else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) {
+ else if (Value == -1) {
Result.setOpcode(Hexagon::V4_SA1_dec);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -754,7 +734,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 0);
break; // 1 SUBInst allocframe(#$u5_3)
case Hexagon::A2_andir:
- if (Inst.getOperand(2).getImm() == 255) {
+ if (minConstant(Inst, 2) == 255) {
Result.setOpcode(Hexagon::V4_SA1_zxtb);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -772,26 +752,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // 2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
case Hexagon::A4_combineii:
case Hexagon::A2_combineii:
- if (Inst.getOperand(1).getImm() == 1) {
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 1) {
Result.setOpcode(Hexagon::V4_SA1_combine1i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#1, #$u2)
}
-
- if (Inst.getOperand(1).getImm() == 3) {
+ if (Value == 3) {
Result.setOpcode(Hexagon::V4_SA1_combine3i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#3, #$u2)
}
- if (Inst.getOperand(1).getImm() == 0) {
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SA1_combine0i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#0, #$u2)
}
- if (Inst.getOperand(1).getImm() == 2) {
+ if (Value == 2) {
Result.setOpcode(Hexagon::V4_SA1_combine2i);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
@@ -894,12 +875,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // 1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
}
case Hexagon::S4_storeirb_io:
- if (Inst.getOperand(2).getImm() == 0) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SS2_storebi0);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break; // 1,2 SUBInst memb($Rs + #$u4_0)=#0
- } else if (Inst.getOperand(2).getImm() == 1) {
+ } else if (Value == 1) {
Result.setOpcode(Hexagon::V4_SS2_storebi1);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -923,12 +906,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 2);
break; // 1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
case Hexagon::S4_storeiri_io:
- if (Inst.getOperand(2).getImm() == 0) {
+ Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+ assert(Absolute);(void)Absolute;
+ if (Value == 0) {
Result.setOpcode(Hexagon::V4_SS2_storewi0);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
break; // 3 1,2 SUBInst memw($Rs + #$u4_2)=#0
- } else if (Inst.getOperand(2).getImm() == 1) {
+ } else if (Value == 1) {
Result.setOpcode(Hexagon::V4_SS2_storewi1);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
@@ -983,7 +968,8 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 0);
break; // 2 SUBInst if (p0) $Rd = #0
case Hexagon::A2_tfrsi:
- if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) {
+ Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+ if (Absolute && Value == -1) {
Result.setOpcode(Hexagon::V4_SA1_setin1);
addOps(Result, Inst, 0);
break; // 2 1 SUBInst $Rd = #-1
@@ -1044,6 +1030,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
<< "\n");
bisReversable = false;
}
+ if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
+ bisReversable = false;
// Try in order.
if (isOrderedDuplexPair(
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index bf51c35..eaa3550 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -37,9 +37,7 @@ static cl::opt<unsigned>
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
const MCSubtargetInfo &STI) {
- MCInst HMI;
- HMI.setOpcode(Hexagon::BUNDLE);
- HMI.addOperand(MCOperand::createImm(0));
+ MCInst HMI = HexagonMCInstrInfo::createBundle();
MCInst *MCB;
if (MCK.getOpcode() != Hexagon::BUNDLE) {
@@ -50,7 +48,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
// Examines packet and pad the packet, if needed, when an
// end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(*MCB);
+ HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
HexagonMCShuffle(*MCII, STI, *MCB);
assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
@@ -60,9 +58,9 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
if (Extended) {
if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
- HexagonMCInstrInfo::clampExtended(*MCII, *SubInst);
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
} else {
- HexagonMCInstrInfo::clampExtended(*MCII, *MCI);
+ HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
}
Extended = false;
} else {
@@ -114,7 +112,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSection *Section = getAssembler().getContext().getELFSection(
SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
SwitchSection(Section);
- AssignSection(Symbol, Section);
+ AssignFragment(Symbol, getCurrentFragment());
MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
SwitchSection(CrntSection);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
new file mode 100644
index 0000000..fc62626
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
+//----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-mcexpr"
+
+HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) HexagonNoExtendOperand(Expr);
+}
+
+bool HexagonNoExtendOperand::evaluateAsRelocatableImpl(
+ MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const {
+ return Expr->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {}
+
+MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const {
+ return Expr->findAssociatedFragment();
+}
+
+void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; }
+
+bool HexagonNoExtendOperand::classof(MCExpr const *E) {
+ return E->getKind() == MCExpr::Target;
+}
+
+HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr)
+ : Expr(Expr) {}
+
+void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ Expr->print(OS, MAI);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
new file mode 100644
index 0000000..60f180f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -0,0 +1,35 @@
+//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCInst;
+class HexagonNoExtendOperand : public MCTargetExpr {
+public:
+ static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx);
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ MCFragment *findAssociatedFragment() const override;
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+ static bool classof(MCExpr const *E);
+ MCExpr const *getExpr() const;
+
+private:
+ HexagonNoExtendOperand(MCExpr const *Expr);
+ MCExpr const *Expr;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 48b15f8..e684207 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -15,17 +15,37 @@
#include "Hexagon.h"
#include "HexagonBaseInfo.h"
+#include "HexagonMCChecker.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
+void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
+ MCContext &Context) {
+ MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
+}
+
+void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI) {
+ assert(HexagonMCInstrInfo::isBundle(MCB));
+ MCOperand const &exOp =
+ MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+
+ // Create the extender.
+ MCInst *XMCI =
+ new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp));
+
+ MCB.addOperand(MCOperand::createInst(XMCI));
+}
+
iterator_range<MCInst::const_iterator>
HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
assert(isBundle(MCI));
- return iterator_range<MCInst::const_iterator>(
- MCI.begin() + bundleInstructionsOffset, MCI.end());
+ return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
}
size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
@@ -35,7 +55,40 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
return (1);
}
-void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Check) {
+ // Examine the packet and convert pairs of instructions to compound
+ // instructions when possible.
+ if (!HexagonDisableCompound)
+ HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
+ // Check the bundle for errors.
+ bool CheckOk = Check ? Check->check() : true;
+ if (!CheckOk)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ // Examine the packet and convert pairs of instructions to duplex
+ // instructions when possible.
+ MCInst InstBundlePreDuplex = MCInst(MCB);
+ if (!HexagonDisableDuplex) {
+ SmallVector<DuplexCandidate, 8> possibleDuplexes;
+ possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+ HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+ }
+ // Examines packet and pad the packet, if needed, when an
+ // end-loop is in the bundle.
+ HexagonMCInstrInfo::padEndloop(Context, MCB);
+ // If compounding and duplexing didn't reduce the size below
+ // 4 or less we have a packet that is too big.
+ if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+ return false;
+ HexagonMCShuffle(MCII, STI, MCB);
+ return true;
+}
+
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
+ MCContext &Context, MCInst &MCI) {
assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
HexagonMCInstrInfo::isExtended(MCII, MCI));
MCOperand &exOp =
@@ -43,13 +96,20 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
// If the extended value is a constant, then use it for the extended and
// for the extender instructions, masking off the lower 6 bits and
// including the assumed bits.
- if (exOp.isImm()) {
+ int64_t Value;
+ if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
- int64_t Bits = exOp.getImm();
- exOp.setImm((Bits & 0x3f) << Shift);
+ exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context));
}
}
+MCInst HexagonMCInstrInfo::createBundle() {
+ MCInst Result;
+ Result.setOpcode(Hexagon::BUNDLE);
+ Result.addOperand(MCOperand::createImm(0));
+ return Result;
+}
+
MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
MCInst const &inst0,
MCInst const &inst1) {
@@ -64,6 +124,27 @@ MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
return duplexInst;
}
+MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
+ MCInst const &Inst,
+ MCOperand const &MO) {
+ assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
+ HexagonMCInstrInfo::isExtended(MCII, Inst));
+
+ MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+ MCInst XMI;
+ XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
+ ? Hexagon::A4_ext_b
+ : Hexagon::A4_ext);
+ if (MO.isImm())
+ XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
+ else if (MO.isExpr())
+ XMI.addOperand(MCOperand::createExpr(MO.getExpr()));
+ else
+ llvm_unreachable("invalid extendable operand");
+ return XMI;
+}
+
MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
size_t Index) {
assert(Index <= bundleSize(MCB));
@@ -76,6 +157,13 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
return nullptr;
}
+void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context,
+ MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend) {
+ if (isConstExtended(MCII, MCI) || MustExtend)
+ addConstExtender(Context, MCII, MCB, MCI);
+}
+
HexagonII::MemAccessSize
HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -186,6 +274,25 @@ MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
return (MCO);
}
+/// Return the new value or the newly produced value.
+unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI);
+ MCOperand const &MCO = MCI.getOperand(O);
+
+ assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+ HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) &&
+ MCO.isReg());
+ return (MCO);
+}
+
int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -242,6 +349,13 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
}
+/// Return whether the insn produces a second value.
+bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2);
+}
+
MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
assert(isBundle(MCB));
assert(Index < HEXAGON_PACKET_SIZE);
@@ -261,6 +375,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
}
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
+}
+
bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
(Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
@@ -282,14 +401,21 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
MCInst const &MCI) {
if (HexagonMCInstrInfo::isExtended(MCII, MCI))
return true;
-
- if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+ // Branch insns are handled as necessary by relaxation.
+ if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+ HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
+ return false;
+ // Otherwise loop instructions and other CR insts are handled by relaxation
+ else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) &&
+ (MCI.getOpcode() != Hexagon::C4_addipc))
+ return false;
+ else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
return false;
- short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI);
- int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
- int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
- MCOperand const &MO = MCI.getOperand(ExtOpNum);
+ MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI);
// We could be using an instruction with an extendable immediate and shoehorn
// a global address into it. If it is a global address it will be constant
@@ -297,15 +423,13 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
// We currently only handle isGlobal() because it is the only kind of
// object we are going to end up with here for now.
// In the future we probably should add isSymbol(), etc.
- if (MO.isExpr())
+ assert(!MO.isImm());
+ int64_t Value;
+ if (!MO.getExpr()->evaluateAsAbsolute(Value))
return true;
-
- // If the extendable operand is not 'Immediate' type, the instruction should
- // have 'isExtended' flag set.
- assert(MO.isImm() && "Extendable operand must be Immediate type");
-
- int ImmValue = MO.getImm();
- return (ImmValue < MinValue || ImmValue > MaxValue);
+ int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ return (MinValue > Value || Value > MaxValue);
}
bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
@@ -374,6 +498,19 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask);
+}
+
+/// Return whether the insn is newly predicated.
+bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -394,6 +531,18 @@ bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
}
+bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memReorderDisabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) {
+ assert(isBundle(MCI));
+ auto Flags = MCI.getOperand(0).getImm();
+ return (Flags & memStoreReorderEnabledMask) != 0;
+}
+
bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
@@ -405,7 +554,28 @@ bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
}
-void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
+bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
+ if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
+ (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
+ return true;
+ return false;
+}
+
+int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
+ auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+ << 8;
+ if (MCI.size() <= Index)
+ return Sentinal;
+ MCOperand const &MCO = MCI.getOperand(Index);
+ if (!MCO.isExpr())
+ return Sentinal;
+ int64_t Value;
+ if (!MCO.getExpr()->evaluateAsAbsolute(Value))
+ return Sentinal;
+ return Value;
+}
+
+void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
assert(isBundle(MCB));
@@ -413,7 +583,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
(HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
((HexagonMCInstrInfo::isOuterLoop(MCB) &&
(HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
- MCB.addOperand(MCOperand::createInst(new MCInst(Nop)));
+ MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
}
bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
@@ -456,6 +626,20 @@ void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
Operand.setImm(Operand.getImm() | innerLoopMask);
}
+void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memReorderDisabledMask);
+ assert(isMemReorderDisabled(MCI));
+}
+
+void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
+ assert(isBundle(MCI));
+ MCOperand &Operand = MCI.getOperand(0);
+ Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
+ assert(isMemStoreReorderEnabled(MCI));
+}
+
void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
assert(isBundle(MCI));
MCOperand &Operand = MCI.getOperand(0);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 32d61a4..0237b28 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -14,9 +14,11 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
+#include "HexagonMCExpr.h"
#include "llvm/MC/MCInst.h"
namespace llvm {
+class HexagonMCChecker;
class MCContext;
class MCInstrDesc;
class MCInstrInfo;
@@ -39,20 +41,47 @@ int64_t const innerLoopMask = 1 << innerLoopOffset;
size_t const outerLoopOffset = 1;
int64_t const outerLoopMask = 1 << outerLoopOffset;
+// do not reorder memory load/stores by default load/stores are re-ordered
+// and by default loads can be re-ordered
+size_t const memReorderDisabledOffset = 2;
+int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+
+// allow re-ordering of memory stores by default stores cannot be re-ordered
+size_t const memStoreReorderEnabledOffset = 3;
+int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset;
+
size_t const bundleInstructionsOffset = 1;
+void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
+void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI);
+
// Returns a iterator range of instructions in this bundle
iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
// Returns the number of instructions in the bundle
size_t bundleSize(MCInst const &MCI);
+// Put the packet in to canonical form, compound, duplex, pad, and shuffle
+bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCB,
+ HexagonMCChecker *Checker);
+
// Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCInst &MCI);
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI, bool MustExtend);
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
MCInst const &inst1);
+MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
+ MCOperand const &MO);
// Convert this instruction in to a duplex subinst
MCInst deriveSubInst(MCInst const &Inst);
@@ -108,6 +137,9 @@ unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the operand that consumes or produces a new value.
MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
+ MCInst const &MCI);
int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -125,6 +157,7 @@ bool hasImmExt(MCInst const &MCI);
// Return whether the instruction is a legal new-value producer.
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the instruction at Index
MCInst const &instruction(MCInst const &MCB, size_t Index);
@@ -134,10 +167,24 @@ bool isBundle(MCInst const &MCI);
// Return whether the insn is an actual insn.
bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the duplex iclass given the two duplex classes
unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
+int64_t minConstant(MCInst const &MCI, size_t Index);
+template <unsigned N, unsigned S>
+bool inRange(MCInst const &MCI, size_t Index) {
+ return isShiftedUInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N, unsigned S>
+bool inSRange(MCInst const &MCI, size_t Index) {
+ return isShiftedInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
+ return isUInt<N>(minConstant(MCI, Index));
+}
+
// Return whether the instruction needs to be constant extended.
bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -173,6 +220,8 @@ bool isIntReg(unsigned Reg);
// Is this register suitable for use in a duplex subinst
bool isIntRegForSubInst(unsigned Reg);
+bool isMemReorderDisabled(MCInst const &MCI);
+bool isMemStoreReorderEnabled(MCInst const &MCI);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -191,6 +240,8 @@ bool isOuterLoop(MCInst const &MCI);
// Return whether this instruction is predicated
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
// Return whether the predicate sense is true
bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -209,9 +260,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
/// Return whether the insn can be packaged only with an A-type insn in slot #1.
bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
// Pad the bundle with nops to satisfy endloop requirements
-void padEndloop(MCInst &MCI);
+void padEndloop(MCContext &Context, MCInst &MCI);
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -220,6 +272,8 @@ void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
// Marks a bundle as endloop0
void setInnerLoop(MCInst &MCI);
+void setMemReorderDisabled(MCInst &MCI);
+void setMemStoreReorderEnabled(MCInst &MCI);
// Marks a bundle as endloop1
void setOuterLoop(MCInst &MCI);
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 53305d8..9a29257 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -40,6 +40,20 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "HexagonGenRegisterInfo.inc"
+cl::opt<bool> llvm::HexagonDisableCompound
+ ("mno-compound",
+ cl::desc("Disable looking for compound instructions for Hexagon"));
+
+cl::opt<bool> llvm::HexagonDisableDuplex
+ ("mno-pairing",
+ cl::desc("Disable looking for duplex instructions for Hexagon"));
+
+StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+ if (CPU.empty())
+ CPU = "hexagonv60";
+ return CPU;
+}
+
MCInstrInfo *llvm::createHexagonMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitHexagonMCInstrInfo(X);
@@ -54,6 +68,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
static MCSubtargetInfo *
createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
}
@@ -76,28 +91,23 @@ public:
StringRef Contents(Buffer);
auto PacketBundle = Contents.rsplit('\n');
auto HeadTail = PacketBundle.first.split('\n');
- auto Preamble = "\t{\n\t\t";
- auto Separator = "";
- while(!HeadTail.first.empty()) {
- OS << Separator;
- StringRef Inst;
+ StringRef Separator = "\n";
+ StringRef Indent = "\t\t";
+ OS << "\t{\n";
+ while (!HeadTail.first.empty()) {
+ StringRef InstTxt;
auto Duplex = HeadTail.first.split('\v');
- if(!Duplex.second.empty()){
- OS << Duplex.first << "\n";
- Inst = Duplex.second;
- }
- else {
- if(!HeadTail.first.startswith("immext"))
- Inst = Duplex.first;
+ if (!Duplex.second.empty()) {
+ OS << Indent << Duplex.first << Separator;
+ InstTxt = Duplex.second;
+ } else if (!HeadTail.first.trim().startswith("immext")) {
+ InstTxt = Duplex.first;
}
- OS << Preamble;
- OS << Inst;
+ if (!InstTxt.empty())
+ OS << Indent << InstTxt << Separator;
HeadTail = HeadTail.second.split('\n');
- Preamble = "";
- Separator = "\n\t\t";
}
- if(HexagonMCInstrInfo::bundleSize(Inst) != 0)
- OS << "\n\t}" << PacketBundle.second;
+ OS << "\t}" << PacketBundle.second;
}
};
}
@@ -154,9 +164,9 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- // For the time being, use static relocations, since there's really no
- // support for PIC yet.
- X->initMCCodeGenInfo(Reloc::Static, CM, OL);
+ if (RM == Reloc::Default)
+ RM = Reloc::Static;
+ X->initMCCodeGenInfo(RM, CM, OL);
return X;
}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index cb62650..a005a01 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -16,6 +16,8 @@
#include <cstdint>
+#include "llvm/Support/CommandLine.h"
+
namespace llvm {
struct InstrItinerary;
struct InstrStage;
@@ -33,22 +35,27 @@ class raw_ostream;
class raw_pwrite_stream;
extern Target TheHexagonTarget;
-
+extern cl::opt<bool> HexagonDisableCompound;
+extern cl::opt<bool> HexagonDisableDuplex;
extern const InstrStage HexagonStages[];
MCInstrInfo *createHexagonMCInstrInfo();
-MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
- MCRegisterInfo const &MRI,
+MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
MCContext &MCT);
-MCAsmBackend *createHexagonAsmBackend(Target const &T,
- MCRegisterInfo const &MRI,
+MCAsmBackend *createHexagonAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
const Triple &TT, StringRef CPU);
MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, StringRef CPU);
+namespace HEXAGON_MC {
+ StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+}
+
} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 41112ac..6ceb848 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -27,6 +27,7 @@
using namespace llvm;
+namespace {
// Insn shuffling priority.
class HexagonBid {
// The priority is directly proportional to how restricted the insn is based
@@ -75,6 +76,7 @@ public:
return false;
};
};
+} // end anonymous namespace
unsigned HexagonResource::setWeight(unsigned s) {
const unsigned SlotWeight = 8;
@@ -93,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) {
return (Weight);
}
+HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
+
+bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
+
+bool HexagonCVIResource::setup() {
+ assert(!TUL);
+ TUL = new (TypeUnitsAndLanes);
+
+ (*TUL)[HexagonII::TypeCVI_VA] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
+ (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
+ (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_ST] =
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+ (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
+ (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
+ (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
+
+ return true;
+}
+
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+ MCInst const *id)
+ : HexagonResource(s) {
+ unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
+
+ if (TUL->count(T)) {
+ // For an HVX insn.
+ Valid = true;
+ setUnits((*TUL)[T].first);
+ setLanes((*TUL)[T].second);
+ setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+ setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
+ } else {
+ // For core insns.
+ Valid = false;
+ setUnits(0);
+ setLanes(0);
+ setLoad(false);
+ setStore(false);
+ }
+}
+
HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
@@ -107,7 +163,7 @@ void HexagonShuffler::reset() {
void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
unsigned S, bool X) {
- HexagonInstr PI(ID, Extender, S, X);
+ HexagonInstr PI(MCII, ID, Extender, S, X);
Packet.push_back(PI);
}
@@ -126,6 +182,8 @@ bool HexagonShuffler::check() {
// Number of memory operations, loads, solo loads, stores, solo stores, single
// stores.
unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+ // Number of HVX loads, HVX stores.
+ unsigned CVIloads = 0, CVIstores = 0;
// Number of duplex insns, solo insns.
unsigned duplex = 0, solo = 0;
// Number of insns restricting other insns in the packet to A and X types,
@@ -168,6 +226,12 @@ bool HexagonShuffler::check() {
case HexagonII::TypeJ:
++jumps;
break;
+ case HexagonII::TypeCVI_VM_VP_LDU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_LD:
+ case HexagonII::TypeCVI_VM_TMP_LD:
+ case HexagonII::TypeCVI_VM_CUR_LD:
+ ++CVIloads;
case HexagonII::TypeLD:
++loads;
++memory;
@@ -176,6 +240,11 @@ bool HexagonShuffler::check() {
if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
break;
+ case HexagonII::TypeCVI_VM_STU:
+ ++onlyNo1;
+ case HexagonII::TypeCVI_VM_ST:
+ case HexagonII::TypeCVI_VM_NEW_ST:
+ ++CVIstores;
case HexagonII::TypeST:
++stores;
++memory;
@@ -203,9 +272,9 @@ bool HexagonShuffler::check() {
}
// Check if the packet is legal.
- if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) ||
- (solo && size() > 1) || (onlyAX && neitherAnorX > 1) ||
- (onlyAX && xtypeFloat)) {
+ if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
+ (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
+ (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
Error = SHUFFLE_ERROR_INVALID;
return false;
}
@@ -336,6 +405,19 @@ bool HexagonShuffler::check() {
return false;
}
}
+ // Verify the CVI slot subscriptions.
+ {
+ HexagonUnitAuction AuctionCVI;
+
+ std::sort(begin(), end(), HexagonInstr::lessCVI);
+
+ for (iterator I = begin(); I != end(); ++I)
+ for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
+ if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
+ }
Error = SHUFFLE_SUCCESS;
return true;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 8b6c72e..174f10f 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -51,6 +51,44 @@ public:
};
};
+// HVX insn resources.
+class HexagonCVIResource : public HexagonResource {
+ typedef std::pair<unsigned, unsigned> UnitsAndLanes;
+ typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+
+ // Available HVX slots.
+ enum {
+ CVI_NONE = 0,
+ CVI_XLANE = 1 << 0,
+ CVI_SHIFT = 1 << 1,
+ CVI_MPY0 = 1 << 2,
+ CVI_MPY1 = 1 << 3
+ };
+
+ static bool SetUp;
+ static bool setup();
+ static TypeUnitsAndLanes *TUL;
+
+ // Count of adjacent slots that the insn requires to be executed.
+ unsigned Lanes;
+ // Flag whether the insn is a load or a store.
+ bool Load, Store;
+ // Flag whether the HVX resources are valid.
+ bool Valid;
+
+ void setLanes(unsigned l) { Lanes = l; };
+ void setLoad(bool f = true) { Load = f; };
+ void setStore(bool f = true) { Store = f; };
+
+public:
+ HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+
+ bool isValid() const { return (Valid); };
+ unsigned getLanes() const { return (Lanes); };
+ bool mayLoad() const { return (Load); };
+ bool mayStore() const { return (Store); };
+};
+
// Handle to an insn used by the shuffling algorithm.
class HexagonInstr {
friend class HexagonShuffler;
@@ -58,12 +96,14 @@ class HexagonInstr {
MCInst const *ID;
MCInst const *Extender;
HexagonResource Core;
+ HexagonCVIResource CVI;
bool SoloException;
public:
- HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s,
- bool x = false)
- : ID(id), Extender(Extender), Core(s), SoloException(x){};
+ HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+ MCInst const *Extender, unsigned s, bool x = false)
+ : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
+ SoloException(x){};
MCInst const *getDesc() const { return (ID); };
@@ -79,6 +119,10 @@ public:
static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
return (HexagonResource::lessUnits(A.Core, B.Core));
};
+ // Check if the handles are in ascending order by HVX slots.
+ static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) {
+ return (HexagonResource::lessUnits(A.CVI, B.CVI));
+ };
};
// Bundle shuffler.
@@ -108,6 +152,8 @@ public:
SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
SHUFFLE_ERROR_NOSLOTS, ///< No free slots for other insns.
SHUFFLE_ERROR_SLOTS, ///< Over-subscribed slots.
+ SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
+ SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
SHUFFLE_ERROR_UNKNOWN ///< Unknown error.
};
diff --git a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 70141a9..72afec1 100644
--- a/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/contrib/llvm/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -17,8 +17,6 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
-
class MSP430InstPrinter : public MCInstPrinter {
public:
MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index ff5b0b6..183dee3 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -17,13 +17,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class MSP430MCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit MSP430MCAsmInfo(const Triple &TT);
- };
+class MSP430MCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit MSP430MCAsmInfo(const Triple &TT);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
index ffcf222..606abc2 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -64,7 +64,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned FuncSize = 0;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
unsigned BlockSize = 0;
for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29bc8b3..18f38b7 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -69,10 +69,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
computeRegisterProperties(STI.getRegisterInfo());
// Provide all sorts of operation actions
-
- // Division is expensive
- setIntDivIsCheap(false);
-
setStackPointerRegisterToSaveRestore(MSP430::SP);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -508,9 +504,10 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
- InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ InVal = DAG.getLoad(
+ VA.getLocVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
}
InVals.push_back(InVal);
@@ -1231,8 +1228,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
}
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// Create loop block
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -1320,8 +1316,7 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator I = BB;
- ++I;
+ MachineFunction::iterator I = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 72b1780..d4f82bd 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -44,11 +44,10 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
@@ -72,11 +71,10 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index 54154a8..47b0e27 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -50,9 +50,9 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetJumpTableSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout &DL = Printer.getDataLayout();
SmallString<256> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
<< Printer.getFunctionNumber() << '_'
<< MO.getIndex();
@@ -67,9 +67,9 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
MCSymbol *MSP430MCInstLower::
GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
- const DataLayout *DL = Printer.TM.getDataLayout();
+ const DataLayout &DL = Printer.getDataLayout();
SmallString<256> Name;
- raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
+ raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "CPI"
<< Printer.getFunctionNumber() << '_'
<< MO.getIndex();
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index 0f75399..b442fc0 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcc5f5b..2d93731 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 5107d2a..d4e061f 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,6 +11,7 @@
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "MipsRegisterInfo.h"
+#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
@@ -106,7 +107,6 @@ class MipsAsmParser : public MCTargetAsmParser {
return static_cast<MipsTargetStreamer &>(TS);
}
- MCSubtargetInfo &STI;
MipsABIInfo ABI;
SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
@@ -114,6 +114,12 @@ class MipsAsmParser : public MCTargetAsmParser {
// selected. This usually happens after an '.end func'
// directive.
bool IsLittleEndian;
+ bool IsPicEnabled;
+ bool IsCpRestoreSet;
+ int CpRestoreOffset;
+ unsigned CpSaveLocation;
+ /// If true, then CpSaveLocation is a register, otherwise it's an offset.
+ bool CpSaveLocationIsRegister;
// Print a warning along with its fix-it message at the given range.
void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
@@ -141,50 +147,41 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID) override;
- MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
+ OperandMatchResultTy parseMemOperand(OperandVector &Operands);
+ OperandMatchResultTy
matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
StringRef Identifier, SMLoc S);
-
- MipsAsmParser::OperandMatchResultTy
- matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
-
- MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseRegisterPair (OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseMovePRegPair(OperandVector &Operands);
-
- MipsAsmParser::OperandMatchResultTy
- parseRegisterList (OperandVector &Operands);
+ OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
+ SMLoc S);
+ OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+ OperandMatchResultTy parseImm(OperandVector &Operands);
+ OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
+ OperandMatchResultTy parseInvNum(OperandVector &Operands);
+ OperandMatchResultTy parseLSAImm(OperandVector &Operands);
+ OperandMatchResultTy parseRegisterPair(OperandVector &Operands);
+ OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
+ OperandMatchResultTy parseRegisterList(OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
bool parseOperand(OperandVector &, StringRef Mnemonic);
- bool needsExpansion(MCInst &Inst);
+ enum MacroExpanderResultTy {
+ MER_NotAMacro,
+ MER_Success,
+ MER_Fail,
+ };
// Expands assembly pseudo instructions.
- // Returns false on success, true otherwise.
- bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ MacroExpanderResultTy
+ tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
bool expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool loadImmediate(int64_t ImmValue, unsigned DstReg, unsigned SrcReg,
- bool Is32BitImm, SMLoc IDLoc,
+ bool Is32BitImm, bool IsAddress, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
@@ -194,11 +191,10 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- bool expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+ const MCOperand &Offset, bool Is32BitAddress,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions);
- bool expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
@@ -209,24 +205,43 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool expandBranchImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- bool expandUlhu(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions);
+ bool expandDiv(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions, const bool IsMips64,
+ const bool Signed);
+
+ bool expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
bool expandUlw(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+ bool expandRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandDRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+ bool expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
bool Is64Bit, SmallVectorImpl<MCInst> &Instructions);
+ void createCpRestoreMemOp(bool IsLoad, int StackOffset, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions);
+
bool reportParseError(Twine ErrorMsg);
bool reportParseError(SMLoc Loc, Twine ErrorMsg);
@@ -239,8 +254,11 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetMips0Directive();
bool parseSetArchDirective();
bool parseSetFeature(uint64_t Feature);
+ bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
bool parseDirectiveCpLoad(SMLoc Loc);
+ bool parseDirectiveCpRestore(SMLoc Loc);
bool parseDirectiveCPSetup();
+ bool parseDirectiveCPReturn();
bool parseDirectiveNaN();
bool parseDirectiveSet();
bool parseDirectiveOption();
@@ -337,6 +355,7 @@ class MipsAsmParser : public MCTargetAsmParser {
// FeatureMipsGP64 | FeatureMips1)
// Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
void selectArch(StringRef ArchFeature) {
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset FeatureBits = STI.getFeatureBits();
FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
STI.setFeatureBits(FeatureBits);
@@ -346,7 +365,8 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (!(STI.getFeatureBits()[Feature])) {
+ if (!(getSTI().getFeatureBits()[Feature])) {
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -354,7 +374,8 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (STI.getFeatureBits()[Feature]) {
+ if (getSTI().getFeatureBits()[Feature]) {
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -363,26 +384,25 @@ class MipsAsmParser : public MCTargetAsmParser {
void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
setFeatureBits(Feature, FeatureString);
- AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
}
void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
clearFeatureBits(Feature, FeatureString);
- AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+ AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
}
public:
enum MipsMatchResultTy {
- Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
+ Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "MipsGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
-
};
- MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti),
+ : MCTargetAsmParser(Options, sti),
ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
sti.getCPU(), Options)) {
MCAsmParserExtension::Initialize(parser);
@@ -390,15 +410,15 @@ public:
parser.addAliasForDirective(".asciiz", ".asciz");
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
// Remember the initial assembler options. The user can not modify these.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
-
+ llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
+
// Create an assembler options environment for the user to modify.
AssemblerOptions.push_back(
- llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
+ llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
getTargetStreamer().updateABIInfo(*this);
@@ -407,6 +427,12 @@ public:
CurrentFn = nullptr;
+ IsPicEnabled =
+ (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_);
+
+ IsCpRestoreSet = false;
+ CpRestoreOffset = -1;
+
Triple TheTriple(sti.getTargetTriple());
if ((TheTriple.getArch() == Triple::mips) ||
(TheTriple.getArch() == Triple::mips64))
@@ -418,70 +444,103 @@ public:
/// True if all of $fcc0 - $fcc7 exist for the current ISA.
bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
- bool isGP64bit() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
- bool isFP64bit() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
+ bool isGP64bit() const {
+ return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
+ }
+ bool isFP64bit() const {
+ return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
+ }
const MipsABIInfo &getABI() const { return ABI; }
bool isABI_N32() const { return ABI.IsN32(); }
bool isABI_N64() const { return ABI.IsN64(); }
bool isABI_O32() const { return ABI.IsO32(); }
- bool isABI_FPXX() const { return STI.getFeatureBits()[Mips::FeatureFPXX]; }
+ bool isABI_FPXX() const {
+ return getSTI().getFeatureBits()[Mips::FeatureFPXX];
+ }
bool useOddSPReg() const {
- return !(STI.getFeatureBits()[Mips::FeatureNoOddSPReg]);
+ return !(getSTI().getFeatureBits()[Mips::FeatureNoOddSPReg]);
}
bool inMicroMipsMode() const {
- return STI.getFeatureBits()[Mips::FeatureMicroMips];
+ return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
+ }
+ bool hasMips1() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips1];
+ }
+ bool hasMips2() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips2];
+ }
+ bool hasMips3() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips3];
+ }
+ bool hasMips4() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips4];
+ }
+ bool hasMips5() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMips5];
}
- bool hasMips1() const { return STI.getFeatureBits()[Mips::FeatureMips1]; }
- bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
- bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
- bool hasMips4() const { return STI.getFeatureBits()[Mips::FeatureMips4]; }
- bool hasMips5() const { return STI.getFeatureBits()[Mips::FeatureMips5]; }
bool hasMips32() const {
- return STI.getFeatureBits()[Mips::FeatureMips32];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32];
}
bool hasMips64() const {
- return STI.getFeatureBits()[Mips::FeatureMips64];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64];
}
bool hasMips32r2() const {
- return STI.getFeatureBits()[Mips::FeatureMips32r2];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
}
bool hasMips64r2() const {
- return STI.getFeatureBits()[Mips::FeatureMips64r2];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
}
bool hasMips32r3() const {
- return (STI.getFeatureBits()[Mips::FeatureMips32r3]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
}
bool hasMips64r3() const {
- return (STI.getFeatureBits()[Mips::FeatureMips64r3]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
}
bool hasMips32r5() const {
- return (STI.getFeatureBits()[Mips::FeatureMips32r5]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
}
bool hasMips64r5() const {
- return (STI.getFeatureBits()[Mips::FeatureMips64r5]);
+ return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
}
bool hasMips32r6() const {
- return STI.getFeatureBits()[Mips::FeatureMips32r6];
+ return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
}
bool hasMips64r6() const {
- return STI.getFeatureBits()[Mips::FeatureMips64r6];
+ return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
}
- bool hasDSP() const { return STI.getFeatureBits()[Mips::FeatureDSP]; }
- bool hasDSPR2() const { return STI.getFeatureBits()[Mips::FeatureDSPR2]; }
- bool hasMSA() const { return STI.getFeatureBits()[Mips::FeatureMSA]; }
+ bool hasDSP() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSP];
+ }
+ bool hasDSPR2() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
+ }
+ bool hasDSPR3() const {
+ return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
+ }
+ bool hasMSA() const {
+ return getSTI().getFeatureBits()[Mips::FeatureMSA];
+ }
bool hasCnMips() const {
- return (STI.getFeatureBits()[Mips::FeatureCnMips]);
+ return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
+ }
+
+ bool inPicMode() {
+ return IsPicEnabled;
}
bool inMips16Mode() const {
- return STI.getFeatureBits()[Mips::FeatureMips16];
+ return getSTI().getFeatureBits()[Mips::FeatureMips16];
+ }
+
+ bool useTraps() const {
+ return getSTI().getFeatureBits()[Mips::FeatureUseTCCInDIV];
}
bool useSoftFloat() const {
- return STI.getFeatureBits()[Mips::FeatureSoftFloat];
+ return getSTI().getFeatureBits()[Mips::FeatureSoftFloat];
}
/// Warn if RegIndex is the same as the current AT.
@@ -869,6 +928,16 @@ public:
Inst.addOperand(MCOperand::createReg(getHWRegsReg()));
}
+ template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
+ void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ uint64_t Imm = getConstantImm() - Offset;
+ Imm &= (1 << Bits) - 1;
+ Imm += Offset;
+ Imm += AdjustOffset;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ }
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCExpr *Expr = getImm();
@@ -878,7 +947,9 @@ public:
void addMemOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR32Reg()));
+ Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit()
+ ? getMemBase()->getGPR64Reg()
+ : getMemBase()->getGPR32Reg()));
const MCExpr *Expr = getMemOff();
addExpr(Inst, Expr);
@@ -924,10 +995,16 @@ public:
bool isRegIdx() const { return Kind == k_RegisterIndex; }
bool isImm() const override { return Kind == k_Immediate; }
bool isConstantImm() const {
- return isImm() && dyn_cast<MCConstantExpr>(getImm());
+ return isImm() && isa<MCConstantExpr>(getImm());
+ }
+ bool isConstantImmz() const {
+ return isConstantImm() && getConstantImm() == 0;
}
- template <unsigned Bits> bool isUImm() const {
- return isImm() && isConstantImm() && isUInt<Bits>(getConstantImm());
+ template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
+ return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
+ }
+ template <unsigned Bits> bool isConstantSImm() const {
+ return isConstantImm() && isInt<Bits>(getConstantImm());
}
bool isToken() const override {
// Note: It's not possible to pretend that other operand kinds are tokens.
@@ -936,10 +1013,15 @@ public:
}
bool isMem() const override { return Kind == k_Memory; }
bool isConstantMemOff() const {
- return isMem() && dyn_cast<MCConstantExpr>(getMemOff());
+ return isMem() && isa<MCConstantExpr>(getMemOff());
}
template <unsigned Bits> bool isMemWithSimmOffset() const {
- return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff())
+ && getMemBase()->isGPRAsmReg();
+ }
+ template <unsigned Bits> bool isMemWithSimmOffsetGPR() const {
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff()) &&
+ getMemBase()->isGPRAsmReg();
}
bool isMemWithGRPMM16Base() const {
return isMem() && getMemBase()->isMM16AsmReg();
@@ -953,13 +1035,23 @@ public:
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::SP);
}
+ template <unsigned Bits, unsigned ShiftLeftAmount>
+ bool isScaledUImm() const {
+ return isConstantImm() &&
+ isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
+ }
bool isRegList16() const {
if (!isRegList())
return false;
int Size = RegList.List->size();
- if (Size < 2 || Size > 5 || *RegList.List->begin() != Mips::S0 ||
- RegList.List->back() != Mips::RA)
+ if (Size < 2 || Size > 5)
+ return false;
+
+ unsigned R0 = RegList.List->front();
+ unsigned R1 = RegList.List->back();
+ if (!((R0 == Mips::S0 && R1 == Mips::RA) ||
+ (R0 == Mips::S0_64 && R1 == Mips::RA_64)))
return false;
int PrevReg = *RegList.List->begin();
@@ -1304,9 +1396,123 @@ static bool hasShortDelaySlot(unsigned Opcode) {
}
}
+static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) {
+ if (const MCSymbolRefExpr *SRExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ return &SRExpr->getSymbol();
+ }
+
+ if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+ const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS());
+ const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS());
+
+ if (LHSSym)
+ return LHSSym;
+
+ if (RHSSym)
+ return RHSSym;
+
+ return nullptr;
+ }
+
+ if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+ return getSingleMCSymbol(UExpr->getSubExpr());
+
+ return nullptr;
+}
+
+static unsigned countMCSymbolRefExpr(const MCExpr *Expr) {
+ if (isa<MCSymbolRefExpr>(Expr))
+ return 1;
+
+ if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr))
+ return countMCSymbolRefExpr(BExpr->getLHS()) +
+ countMCSymbolRefExpr(BExpr->getRHS());
+
+ if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+ return countMCSymbolRefExpr(UExpr->getSubExpr());
+
+ return 0;
+}
+
+namespace {
+void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.addOperand(Op1);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, Instructions);
+}
+
+void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, Instructions);
+}
+
+void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createImm(Imm1));
+ tmpInst.addOperand(MCOperand::createImm(Imm2));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ tmpInst.setOpcode(Opcode);
+ tmpInst.addOperand(MCOperand::createReg(Reg0));
+ tmpInst.addOperand(MCOperand::createReg(Reg1));
+ tmpInst.addOperand(Op2);
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+
+void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc,
+ Instructions);
+}
+
+void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc,
+ Instructions);
+}
+
+void emitAppropriateDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
+ SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+ if (ShiftAmount >= 32) {
+ emitRRI(Mips::DSLL32, DstReg, SrcReg, ShiftAmount - 32, IDLoc,
+ Instructions);
+ return;
+ }
+
+ emitRRI(Mips::DSLL, DstReg, SrcReg, ShiftAmount, IDLoc, Instructions);
+}
+} // end anonymous namespace.
+
bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ bool ExpandedJalSym = false;
Inst.setLoc(IDLoc);
@@ -1365,12 +1571,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BEQZ16_MM:
+ case Mips::BEQZC16_MMR6:
case Mips::BNEZ16_MM:
+ case Mips::BNEZC16_MMR6:
assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
Offset = Inst.getOperand(1);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(8, Offset.getImm()))
+ if (!isInt<8>(Offset.getImm()))
return Error(IDLoc, "branch target out of range");
if (OffsetToAlignment(Offset.getImm(), 2LL))
return Error(IDLoc, "branch to misaligned address");
@@ -1415,32 +1623,6 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
break;
- case Mips::CINS:
- case Mips::CINS32:
- case Mips::EXTS:
- case Mips::EXTS32:
- assert(MCID.getNumOperands() == 4 && "unexpected number of operands");
- // Check length
- Opnd = Inst.getOperand(3);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (Imm < 0 || Imm > 31)
- return Error(IDLoc, "immediate operand value out of range");
- // Check position
- Opnd = Inst.getOperand(2);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (Imm < 0 || Imm > (Opcode == Mips::CINS ||
- Opcode == Mips::EXTS ? 63 : 31))
- return Error(IDLoc, "immediate operand value out of range");
- if (Imm > 31) {
- Inst.setOpcode(Opcode == Mips::CINS ? Mips::CINS32 : Mips::EXTS32);
- Inst.getOperand(2).setImm(Imm - 32);
- }
- break;
-
case Mips::SEQi:
case Mips::SNEi:
assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
@@ -1454,6 +1636,81 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // This expansion is not in a function called by tryExpandInstruction()
+ // because the pseudo-instruction doesn't have a distinct opcode.
+ if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) &&
+ inPicMode()) {
+ warnIfNoMacro(IDLoc);
+
+ const MCExpr *JalExpr = Inst.getOperand(0).getExpr();
+
+ // We can do this expansion if there's only 1 symbol in the argument
+ // expression.
+ if (countMCSymbolRefExpr(JalExpr) > 1)
+ return Error(IDLoc, "jal doesn't support multiple symbols in PIC mode");
+
+ // FIXME: This is checking the expression can be handled by the later stages
+ // of the assembler. We ought to leave it to those later stages but
+ // we can't do that until we stop evaluateRelocExpr() rewriting the
+ // expressions into non-equivalent forms.
+ const MCSymbol *JalSym = getSingleMCSymbol(JalExpr);
+
+ // FIXME: Add support for label+offset operands (currently causes an error).
+ // FIXME: Add support for forward-declared local symbols.
+ // FIXME: Add expansion for when the LargeGOT option is enabled.
+ if (JalSym->isInSection() || JalSym->isTemporary()) {
+ if (isABI_O32()) {
+ // If it's a local symbol and the O32 ABI is being used, we expand to:
+ // lw $25, 0($gp)
+ // R_(MICRO)MIPS_GOT16 label
+ // addiu $25, $25, 0
+ // R_(MICRO)MIPS_LO16 label
+ // jalr $25
+ const MCExpr *Got16RelocExpr = evaluateRelocExpr(JalExpr, "got");
+ const MCExpr *Lo16RelocExpr = evaluateRelocExpr(JalExpr, "lo");
+
+ emitRRX(Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(Got16RelocExpr), IDLoc, Instructions);
+ emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
+ MCOperand::createExpr(Lo16RelocExpr), IDLoc, Instructions);
+ } else if (isABI_N32() || isABI_N64()) {
+ // If it's a local symbol and the N32/N64 ABIs are being used,
+ // we expand to:
+ // lw/ld $25, 0($gp)
+ // R_(MICRO)MIPS_GOT_DISP label
+ // jalr $25
+ const MCExpr *GotDispRelocExpr = evaluateRelocExpr(JalExpr, "got_disp");
+
+ emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(GotDispRelocExpr), IDLoc, Instructions);
+ }
+ } else {
+ // If it's an external/weak symbol, we expand to:
+ // lw/ld $25, 0($gp)
+ // R_(MICRO)MIPS_CALL16 label
+ // jalr $25
+ const MCExpr *Call16RelocExpr = evaluateRelocExpr(JalExpr, "call16");
+
+ emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+ MCOperand::createExpr(Call16RelocExpr), IDLoc, Instructions);
+ }
+
+ MCInst JalrInst;
+ if (IsCpRestoreSet && inMicroMipsMode())
+ JalrInst.setOpcode(Mips::JALRS_MM);
+ else
+ JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+ JalrInst.addOperand(MCOperand::createReg(Mips::RA));
+ JalrInst.addOperand(MCOperand::createReg(Mips::T9));
+
+ // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
+ // This relocation is supposed to be an optimization hint for the linker
+ // and is not necessary for correctness.
+
+ Inst = JalrInst;
+ ExpandedJalSym = true;
+ }
+
if (MCID.mayLoad() || MCID.mayStore()) {
// Check the offset of memory operand, if it is a symbol
// reference or immediate we may have to expand instructions.
@@ -1500,17 +1757,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
int MemOffset = Op.getImm();
MCOperand &DstReg = Inst.getOperand(0);
MCOperand &BaseReg = Inst.getOperand(1);
- if (isIntN(9, MemOffset) && (MemOffset % 4 == 0) &&
+ if (isInt<9>(MemOffset) && (MemOffset % 4 == 0) &&
getContext().getRegisterInfo()->getRegClass(
Mips::GPRMM16RegClassID).contains(DstReg.getReg()) &&
- BaseReg.getReg() == Mips::GP) {
- MCInst TmpInst;
- TmpInst.setLoc(IDLoc);
- TmpInst.setOpcode(Mips::LWGP_MM);
- TmpInst.addOperand(MCOperand::createReg(DstReg.getReg()));
- TmpInst.addOperand(MCOperand::createReg(Mips::GP));
- TmpInst.addOperand(MCOperand::createImm(MemOffset));
- Instructions.push_back(TmpInst);
+ (BaseReg.getReg() == Mips::GP ||
+ BaseReg.getReg() == Mips::GP_64)) {
+
+ emitRRI(Mips::LWGP_MM, DstReg.getReg(), Mips::GP, MemOffset,
+ IDLoc, Instructions);
return false;
}
}
@@ -1597,7 +1851,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Imm < -1 || Imm > 14)
return Error(IDLoc, "immediate operand value out of range");
break;
+ case Mips::TEQ_MM:
+ case Mips::TGE_MM:
+ case Mips::TGEU_MM:
+ case Mips::TLT_MM:
+ case Mips::TLTU_MM:
+ case Mips::TNE_MM:
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1607,6 +1868,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break;
case Mips::LHU16_MM:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1616,6 +1878,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
break;
case Mips::LW16_MM:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
Opnd = Inst.getOperand(2);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
@@ -1623,93 +1886,111 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
if (Imm < 0 || Imm > 60 || (Imm % 4 != 0))
return Error(IDLoc, "immediate operand value out of range");
break;
- case Mips::CACHE:
- case Mips::PREF:
- Opnd = Inst.getOperand(2);
- if (!Opnd.isImm())
- return Error(IDLoc, "expected immediate operand kind");
- Imm = Opnd.getImm();
- if (!isUInt<5>(Imm))
- return Error(IDLoc, "immediate operand value out of range");
- break;
case Mips::ADDIUPC_MM:
MCOperand Opnd = Inst.getOperand(1);
if (!Opnd.isImm())
return Error(IDLoc, "expected immediate operand kind");
int Imm = Opnd.getImm();
- if ((Imm % 4 != 0) || !isIntN(25, Imm))
+ if ((Imm % 4 != 0) || !isInt<25>(Imm))
return Error(IDLoc, "immediate operand value out of range");
break;
}
}
- if (needsExpansion(Inst)) {
- if (expandInstruction(Inst, IDLoc, Instructions))
- return true;
- } else
+ MacroExpanderResultTy ExpandResult =
+ tryExpandInstruction(Inst, IDLoc, Instructions);
+ switch (ExpandResult) {
+ case MER_NotAMacro:
Instructions.push_back(Inst);
+ break;
+ case MER_Success:
+ break;
+ case MER_Fail:
+ return true;
+ }
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
- return false;
-}
+ if ((Inst.getOpcode() == Mips::JalOneReg ||
+ Inst.getOpcode() == Mips::JalTwoReg || ExpandedJalSym) &&
+ isPicAndNotNxxAbi()) {
+ if (IsCpRestoreSet) {
+ // We need a NOP between the JALR and the LW:
+ // If .set reorder has been used, we've already emitted a NOP.
+ // If .set noreorder has been used, we need to emit a NOP at this point.
+ if (!AssemblerOptions.back()->isReorder())
+ createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
-bool MipsAsmParser::needsExpansion(MCInst &Inst) {
+ // Load the $gp from the stack.
+ SmallVector<MCInst, 3> LoadInsts;
+ createCpRestoreMemOp(true /*IsLoad*/, CpRestoreOffset /*StackOffset*/,
+ IDLoc, LoadInsts);
- switch (Inst.getOpcode()) {
- case Mips::LoadImm32:
- case Mips::LoadImm64:
- case Mips::LoadAddrImm32:
- case Mips::LoadAddrReg32:
- case Mips::B_MM_Pseudo:
- case Mips::LWM_MM:
- case Mips::SWM_MM:
- case Mips::JalOneReg:
- case Mips::JalTwoReg:
- case Mips::BneImm:
- case Mips::BeqImm:
- case Mips::BLT:
- case Mips::BLE:
- case Mips::BGE:
- case Mips::BGT:
- case Mips::BLTU:
- case Mips::BLEU:
- case Mips::BGEU:
- case Mips::BGTU:
- case Mips::Ulhu:
- case Mips::Ulw:
- return true;
- default:
- return false;
+ for (const MCInst &Inst : LoadInsts)
+ Instructions.push_back(Inst);
+
+ } else
+ Warning(IDLoc, "no .cprestore used in PIC mode");
}
+
+ return false;
}
-bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+MipsAsmParser::MacroExpanderResultTy
+MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
- default: llvm_unreachable("unimplemented expansion");
+ default:
+ return MER_NotAMacro;
case Mips::LoadImm32:
- return expandLoadImm(Inst, true, IDLoc, Instructions);
+ return expandLoadImm(Inst, true, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::LoadImm64:
- return expandLoadImm(Inst, false, IDLoc, Instructions);
+ return expandLoadImm(Inst, false, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::LoadAddrImm32:
- return expandLoadAddressImm(Inst, true, IDLoc, Instructions);
+ case Mips::LoadAddrImm64:
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ assert((Inst.getOperand(1).isImm() || Inst.getOperand(1).isExpr()) &&
+ "expected immediate operand kind");
+
+ return expandLoadAddress(Inst.getOperand(0).getReg(), Mips::NoRegister,
+ Inst.getOperand(1),
+ Inst.getOpcode() == Mips::LoadAddrImm32, IDLoc,
+ Instructions)
+ ? MER_Fail
+ : MER_Success;
case Mips::LoadAddrReg32:
- return expandLoadAddressReg(Inst, true, IDLoc, Instructions);
+ case Mips::LoadAddrReg64:
+ assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+ assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+ assert((Inst.getOperand(2).isImm() || Inst.getOperand(2).isExpr()) &&
+ "expected immediate operand kind");
+
+ return expandLoadAddress(Inst.getOperand(0).getReg(),
+ Inst.getOperand(1).getReg(), Inst.getOperand(2),
+ Inst.getOpcode() == Mips::LoadAddrReg32, IDLoc,
+ Instructions)
+ ? MER_Fail
+ : MER_Success;
case Mips::B_MM_Pseudo:
- return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
+ case Mips::B_MMR6_Pseudo:
+ return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::SWM_MM:
case Mips::LWM_MM:
- return expandLoadStoreMultiple(Inst, IDLoc, Instructions);
+ return expandLoadStoreMultiple(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::JalOneReg:
case Mips::JalTwoReg:
- return expandJalWithRegs(Inst, IDLoc, Instructions);
+ return expandJalWithRegs(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
case Mips::BneImm:
case Mips::BeqImm:
- return expandBranchImm(Inst, IDLoc, Instructions);
+ return expandBranchImm(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::BLT:
case Mips::BLE:
case Mips::BGE:
@@ -1718,78 +1999,97 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
case Mips::BLEU:
case Mips::BGEU:
case Mips::BGTU:
- return expandCondBranches(Inst, IDLoc, Instructions);
+ case Mips::BLTL:
+ case Mips::BLEL:
+ case Mips::BGEL:
+ case Mips::BGTL:
+ case Mips::BLTUL:
+ case Mips::BLEUL:
+ case Mips::BGEUL:
+ case Mips::BGTUL:
+ case Mips::BLTImmMacro:
+ case Mips::BLEImmMacro:
+ case Mips::BGEImmMacro:
+ case Mips::BGTImmMacro:
+ case Mips::BLTUImmMacro:
+ case Mips::BLEUImmMacro:
+ case Mips::BGEUImmMacro:
+ case Mips::BGTUImmMacro:
+ case Mips::BLTLImmMacro:
+ case Mips::BLELImmMacro:
+ case Mips::BGELImmMacro:
+ case Mips::BGTLImmMacro:
+ case Mips::BLTULImmMacro:
+ case Mips::BLEULImmMacro:
+ case Mips::BGEULImmMacro:
+ case Mips::BGTULImmMacro:
+ return expandCondBranches(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::SDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, false, true) ? MER_Fail
+ : MER_Success;
+ case Mips::DSDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, true, true) ? MER_Fail
+ : MER_Success;
+ case Mips::UDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, false, false) ? MER_Fail
+ : MER_Success;
+ case Mips::DUDivMacro:
+ return expandDiv(Inst, IDLoc, Instructions, true, false) ? MER_Fail
+ : MER_Success;
+ case Mips::Ulh:
+ return expandUlh(Inst, true, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::Ulhu:
- return expandUlhu(Inst, IDLoc, Instructions);
+ return expandUlh(Inst, false, IDLoc, Instructions) ? MER_Fail : MER_Success;
case Mips::Ulw:
- return expandUlw(Inst, IDLoc, Instructions);
+ return expandUlw(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
+ case Mips::NORImm:
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::ADDi:
+ case Mips::ADDiu:
+ case Mips::SLTi:
+ case Mips::SLTiu:
+ if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ if (isInt<16>(ImmValue))
+ return MER_NotAMacro;
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ }
+ return MER_NotAMacro;
+ case Mips::ANDi:
+ case Mips::ORi:
+ case Mips::XORi:
+ if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+ if (isUInt<16>(ImmValue))
+ return MER_NotAMacro;
+ return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ }
+ return MER_NotAMacro;
+ case Mips::ROL:
+ case Mips::ROR:
+ return expandRotation(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::ROLImm:
+ case Mips::RORImm:
+ return expandRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::DROL:
+ case Mips::DROR:
+ return expandDRotation(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
+ case Mips::DROLImm:
+ case Mips::DRORImm:
+ return expandDRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+ : MER_Success;
}
}
-namespace {
-void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- MCInst tmpInst;
- tmpInst.setOpcode(Opcode);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(Imm);
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
-}
-
-void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions);
-}
-
-
-void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm,
- SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
- MCInst tmpInst;
- tmpInst.setOpcode(Opcode);
- tmpInst.addOperand(MCOperand::createReg(DstReg));
- tmpInst.addOperand(MCOperand::createReg(SrcReg));
- tmpInst.addOperand(Imm);
- tmpInst.setLoc(IDLoc);
- Instructions.push_back(tmpInst);
-}
-
-void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg,
- unsigned SrcReg2, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc,
- Instructions);
-}
-
-void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm,
- SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
- emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc,
- Instructions);
-}
-
-template <int16_t ShiftAmount>
-void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- if (ShiftAmount >= 32)
- emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions);
- else if (ShiftAmount > 0)
- emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions);
-
- // There's no need for an ORi if the immediate is 0.
- if (Operand.isImm() && Operand.getImm() == 0)
- return;
-
- emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions);
-}
-
-template <unsigned ShiftAmount>
-void createLShiftOri(int64_t Value, unsigned RegNo, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- createLShiftOri<ShiftAmount>(MCOperand::createImm(Value), RegNo, IDLoc,
- Instructions);
-}
-}
-
bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
// Create a JALR instruction which is going to replace the pseudo-JAL.
@@ -1800,8 +2100,11 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
if (Opcode == Mips::JalOneReg) {
// jal $rs => jalr $rs
- if (inMicroMipsMode()) {
- JalrInst.setOpcode(Mips::JALR16_MM);
+ if (IsCpRestoreSet && inMicroMipsMode()) {
+ JalrInst.setOpcode(Mips::JALRS16_MM);
+ JalrInst.addOperand(FirstRegOp);
+ } else if (inMicroMipsMode()) {
+ JalrInst.setOpcode(hasMips32r6() ? Mips::JALRC16_MMR6 : Mips::JALR16_MM);
JalrInst.addOperand(FirstRegOp);
} else {
JalrInst.setOpcode(Mips::JALR);
@@ -1810,30 +2113,47 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
}
} else if (Opcode == Mips::JalTwoReg) {
// jal $rd, $rs => jalr $rd, $rs
- JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+ if (IsCpRestoreSet && inMicroMipsMode())
+ JalrInst.setOpcode(Mips::JALRS_MM);
+ else
+ JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
JalrInst.addOperand(FirstRegOp);
const MCOperand SecondRegOp = Inst.getOperand(1);
JalrInst.addOperand(SecondRegOp);
}
Instructions.push_back(JalrInst);
- // If .set reorder is active, emit a NOP after it.
- if (AssemblerOptions.back()->isReorder()) {
- // This is a 32-bit NOP because these 2 pseudo-instructions
- // do not have a short delay slot.
- MCInst NopInst;
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createImm(0));
- Instructions.push_back(NopInst);
+ // If .set reorder is active and branch instruction has a delay slot,
+ // emit a NOP after it.
+ const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
+ createNop(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc, Instructions);
}
return false;
}
+/// Can the value be represented by a unsigned N-bit value and a shift left?
+template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
+ unsigned BitNum = findFirstSet(x);
+
+ return (x == x >> BitNum << BitNum) && isUInt<N>(x >> BitNum);
+}
+
+/// Load (or add) an immediate into a register.
+///
+/// @param ImmValue The immediate to load.
+/// @param DstReg The register that will hold the immediate.
+/// @param SrcReg A register to add to the immediate or Mips::NoRegister
+/// for a simple initialization.
+/// @param Is32BitImm Is ImmValue 32-bit or 64-bit?
+/// @param IsAddress True if the immediate represents an address. False if it
+/// is an integer.
+/// @param IDLoc Location of the immediate in the source file.
+/// @param Instructions The instructions emitted by this expansion.
bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
- unsigned SrcReg, bool Is32BitImm, SMLoc IDLoc,
+ unsigned SrcReg, bool Is32BitImm,
+ bool IsAddress, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
if (!Is32BitImm && !isGP64bit()) {
Error(IDLoc, "instruction requires a 64-bit architecture");
@@ -1852,6 +2172,9 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
}
}
+ unsigned ZeroReg = IsAddress ? ABI.GetNullPtr() : ABI.GetZeroReg();
+ unsigned AdduOp = !Is32BitImm ? Mips::DADDu : Mips::ADDu;
+
bool UseSrcReg = false;
if (SrcReg != Mips::NoRegister)
UseSrcReg = true;
@@ -1866,111 +2189,129 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
TmpReg = ATReg;
}
- // FIXME: gas has a special case for values that are 000...1111, which
- // becomes a li -1 and then a dsrl
if (isInt<16>(ImmValue)) {
- // li d,j => addiu d,$zero,j
if (!UseSrcReg)
- SrcReg = Mips::ZERO;
+ SrcReg = ZeroReg;
+
+ // This doesn't quite follow the usual ABI expectations for N32 but matches
+ // traditional assembler behaviour. N32 would normally use addiu for both
+ // integers and addresses.
+ if (IsAddress && !Is32BitImm) {
+ emitRRI(Mips::DADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+ return false;
+ }
+
emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
- } else if (isUInt<16>(ImmValue)) {
- // li d,j => ori d,$zero,j
+ return false;
+ }
+
+ if (isUInt<16>(ImmValue)) {
unsigned TmpReg = DstReg;
if (SrcReg == DstReg) {
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
+ TmpReg = getATReg(IDLoc);
+ if (!TmpReg)
return true;
- TmpReg = ATReg;
}
- emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions);
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, ImmValue, IDLoc, Instructions);
if (UseSrcReg)
- emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
- } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+ emitRRR(ABI.GetPtrAdduOp(), DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+
+ if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
warnIfNoMacro(IDLoc);
- // For all other values which are representable as a 32-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,lo16(j)
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
if (!Is32BitImm && !isInt<32>(ImmValue)) {
- // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
+ // Traditional behaviour seems to special case this particular value. It's
+ // not clear why other masks are handled differently.
+ if (ImmValue == 0xffffffff) {
+ emitRI(Mips::LUi, TmpReg, 0xffff, IDLoc, Instructions);
+ emitRRI(Mips::DSRL32, TmpReg, TmpReg, 0, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+
+ // Expand to an ORi instead of a LUi to avoid sign-extending into the
// upper 32 bits.
- emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions);
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits31To16, IDLoc, Instructions);
emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions);
- } else
- emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
- createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
+ if (Bits15To0)
+ emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
+ emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
+ if (Bits15To0)
+ emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
-
- } else if ((ImmValue & (0xffffLL << 48)) == 0) {
- warnIfNoMacro(IDLoc);
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ return false;
+ }
- // <------- lo32 ------>
- // <------- hi32 ------>
- // <- hi16 -> <- lo16 ->
- // _________________________________
- // | | | |
- // | 16-bits | 16-bits | 16-bits |
- // |__________|__________|__________|
- //
- // For any 64-bit value that is representable as a 48-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,hi16(lo32(j))
- // dsll d,d,16
- // ori d,d,lo16(lo32(j))
- uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
- uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
- uint16_t Bits15To0 = ImmValue & 0xffff;
+ if (isShiftedUIntAtAnyPosition<16>(ImmValue)) {
+ if (Is32BitImm) {
+ Error(IDLoc, "instruction requires a 32-bit immediate");
+ return true;
+ }
- emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions);
- createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
+ // Traditionally, these immediates are shifted as little as possible and as
+ // such we align the most significant bit to bit 15 of our temporary.
+ unsigned FirstSet = findFirstSet((uint64_t)ImmValue);
+ unsigned LastSet = findLastSet((uint64_t)ImmValue);
+ unsigned ShiftAmount = FirstSet - (15 - (LastSet - FirstSet));
+ uint16_t Bits = (ImmValue >> ShiftAmount) & 0xffff;
+ emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits, IDLoc, Instructions);
+ emitRRI(Mips::DSLL, TmpReg, TmpReg, ShiftAmount, IDLoc, Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
- } else {
- warnIfNoMacro(IDLoc);
+ return false;
+ }
- // <------- hi32 ------> <------- lo32 ------>
- // <- hi16 -> <- lo16 ->
- // ___________________________________________
- // | | | | |
- // | 16-bits | 16-bits | 16-bits | 16-bits |
- // |__________|__________|__________|__________|
- //
- // For all other values which are representable as a 64-bit integer:
- // li d,j => lui d,hi16(j)
- // ori d,d,lo16(hi32(j))
- // dsll d,d,16
- // ori d,d,hi16(lo32(j))
- // dsll d,d,16
- // ori d,d,lo16(lo32(j))
- uint16_t Bits63To48 = (ImmValue >> 48) & 0xffff;
- uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
- uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
- uint16_t Bits15To0 = ImmValue & 0xffff;
+ warnIfNoMacro(IDLoc);
- emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions);
- createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
+ // The remaining case is packed with a sequence of dsll and ori with zeros
+ // being omitted and any neighbouring dsll's being coalesced.
+ // The highest 32-bit's are equivalent to a 32-bit immediate load.
- // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
- // two left shifts of 16 bits.
- if (Bits31To16 == 0) {
- createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions);
- } else {
- createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions);
- createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
+ // Load bits 32-63 of ImmValue into bits 0-31 of the temporary register.
+ if (loadImmediate(ImmValue >> 32, TmpReg, Mips::NoRegister, true, false,
+ IDLoc, Instructions))
+ return false;
+
+ // Shift and accumulate into the register. If a 16-bit chunk is zero, then
+ // skip it and defer the shift to the next chunk.
+ unsigned ShiftCarriedForwards = 16;
+ for (int BitNum = 16; BitNum >= 0; BitNum -= 16) {
+ uint16_t ImmChunk = (ImmValue >> BitNum) & 0xffff;
+
+ if (ImmChunk != 0) {
+ emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+ Instructions);
+ emitRRI(Mips::ORi, TmpReg, TmpReg, ImmChunk, IDLoc, Instructions);
+ ShiftCarriedForwards = 0;
}
- if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
+ ShiftCarriedForwards += 16;
}
+ ShiftCarriedForwards -= 16;
+
+ // Finish any remaining shifts left by trailing zeros.
+ if (ShiftCarriedForwards)
+ emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+ Instructions);
+
+ if (UseSrcReg)
+ emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+
return false;
}
@@ -1982,63 +2323,38 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
assert(DstRegOp.isReg() && "expected register operand kind");
if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
- Is32BitImm, IDLoc, Instructions))
+ Is32BitImm, false, IDLoc, Instructions))
return true;
return false;
}
-bool
-MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- const MCOperand &DstRegOp = Inst.getOperand(0);
- assert(DstRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &SrcRegOp = Inst.getOperand(1);
- assert(SrcRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &ImmOp = Inst.getOperand(2);
- assert((ImmOp.isImm() || ImmOp.isExpr()) &&
- "expected immediate operand kind");
- if (!ImmOp.isImm()) {
- if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
- SrcRegOp.getReg(), Is32BitImm, IDLoc,
- Instructions))
- return true;
-
- return false;
- }
-
- if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(),
- Is32BitImm, IDLoc, Instructions))
+bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+ const MCOperand &Offset,
+ bool Is32BitAddress, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // la can't produce a usable address when addresses are 64-bit.
+ if (Is32BitAddress && ABI.ArePtrs64bit()) {
+ // FIXME: Demote this to a warning and continue as if we had 'dla' instead.
+ // We currently can't do this because we depend on the equality
+ // operator and N64 can end up with a GPR32/GPR64 mismatch.
+ Error(IDLoc, "la used to load 64-bit address");
+ // Continue as if we had 'dla' instead.
+ Is32BitAddress = false;
+ }
+
+ // dla requires 64-bit addresses.
+ if (!Is32BitAddress && !ABI.ArePtrs64bit()) {
+ Error(IDLoc, "instruction requires a 64-bit architecture");
return true;
-
- return false;
-}
-
-bool
-MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
- const MCOperand &DstRegOp = Inst.getOperand(0);
- assert(DstRegOp.isReg() && "expected register operand kind");
-
- const MCOperand &ImmOp = Inst.getOperand(1);
- assert((ImmOp.isImm() || ImmOp.isExpr()) &&
- "expected immediate operand kind");
- if (!ImmOp.isImm()) {
- if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
- Mips::NoRegister, Is32BitImm, IDLoc,
- Instructions))
- return true;
-
- return false;
}
- if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
- Is32BitImm, IDLoc, Instructions))
- return true;
+ if (!Offset.isImm())
+ return loadAndAddSymbolAddress(Offset.getExpr(), DstReg, BaseReg,
+ Is32BitAddress, IDLoc, Instructions);
- return false;
+ return loadImmediate(Offset.getImm(), DstReg, BaseReg, Is32BitAddress, true,
+ IDLoc, Instructions);
}
bool MipsAsmParser::loadAndAddSymbolAddress(
@@ -2046,67 +2362,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
warnIfNoMacro(IDLoc);
- if (Is32BitSym && isABI_N64())
- Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol");
-
- MCInst tmpInst;
- const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymExpr);
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+ const MCExpr *Symbol = cast<MCExpr>(SymExpr);
+ const MipsMCExpr *HiExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_ABS_HI, Symbol, getContext());
+ const MipsMCExpr *LoExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_ABS_LO, Symbol, getContext());
bool UseSrcReg = SrcReg != Mips::NoRegister;
+ // This is the 64-bit symbol address expansion.
+ if (ABI.ArePtrs64bit() && isGP64bit()) {
+ // We always need AT for the 64-bit expansion.
+ // If it is not available we exit.
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ const MipsMCExpr *HighestExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_HIGHEST, Symbol, getContext());
+ const MipsMCExpr *HigherExpr = MipsMCExpr::create(
+ MCSymbolRefExpr::VK_Mips_HIGHER, Symbol, getContext());
+
+ if (UseSrcReg && (DstReg == SrcReg)) {
+ // If $rs is the same as $rd:
+ // (d)la $rd, sym($rd) => lui $at, %highest(sym)
+ // daddiu $at, $at, %higher(sym)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %hi(sym)
+ // dsll $at, $at, 16
+ // daddiu $at, $at, %lo(sym)
+ // daddu $rd, $at, $rd
+ emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HigherExpr),
+ IDLoc, Instructions);
+ emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+ Instructions);
+ emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
+ emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, Instructions);
+
+ return false;
+ }
+
+ // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // lui $at, %hi(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // daddiu $at, $at, %lo(sym)
+ // dsll32 $rd, $rd, 0
+ // daddu $rd, $rd, $at
+ // (daddu $rd, $rd, $rs)
+ emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ Instructions);
+ emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+ Instructions);
+ emitRRX(Mips::DADDiu, DstReg, DstReg, MCOperand::createExpr(HigherExpr),
+ IDLoc, Instructions);
+ emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
+ emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, Instructions);
+ emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, Instructions);
+ if (UseSrcReg)
+ emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+
+ return false;
+ }
+
+ // And now, the 32-bit symbol address expansion:
+ // If $rs is the same as $rd:
+ // (d)la $rd, sym($rd) => lui $at, %hi(sym)
+ // ori $at, $at, %lo(sym)
+ // addu $rd, $at, $rd
+ // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %hi(sym)
+ // ori $rd, $rd, %lo(sym)
+ // (addu $rd, $rd, $rs)
unsigned TmpReg = DstReg;
if (UseSrcReg && (DstReg == SrcReg)) {
- // At this point we need AT to perform the expansions and we exit if it is
- // not available.
+ // If $rs is the same as $rd, we need to use AT.
+ // If it is not available we exit.
unsigned ATReg = getATReg(IDLoc);
if (!ATReg)
return true;
TmpReg = ATReg;
}
- if (!Is32BitSym) {
- // If it's a 64-bit architecture, expand to:
- // la d,sym => lui d,highest(sym)
- // ori d,d,higher(sym)
- // dsll d,d,16
- // ori d,d,hi16(sym)
- // dsll d,d,16
- // ori d,d,lo16(sym)
- const MCSymbolRefExpr *HighestExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
- const MCSymbolRefExpr *HigherExpr = MCSymbolRefExpr::create(
- &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
-
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
- Instructions.push_back(tmpInst);
-
- createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(),
- Instructions);
- createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(),
- Instructions);
- createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
- Instructions);
- } else {
- // Otherwise, expand to:
- // la d,sym => lui d,hi16(sym)
- // ori d,d,lo16(sym)
- tmpInst.setOpcode(Mips::LUi);
- tmpInst.addOperand(MCOperand::createReg(TmpReg));
- tmpInst.addOperand(MCOperand::createExpr(HiExpr));
- Instructions.push_back(tmpInst);
-
- emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(),
- Instructions);
- }
+ emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(HiExpr), IDLoc, Instructions);
+ emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), IDLoc,
+ Instructions);
if (UseSrcReg)
- createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions);
+ emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+ else
+ assert(DstReg == TmpReg);
return false;
}
@@ -2125,12 +2476,13 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
Inst.addOperand(MCOperand::createExpr(Offset.getExpr()));
} else {
assert(Offset.isImm() && "expected immediate operand kind");
- if (isIntN(11, Offset.getImm())) {
+ if (isInt<11>(Offset.getImm())) {
// If offset fits into 11 bits then this instruction becomes microMIPS
// 16-bit unconditional branch instruction.
- Inst.setOpcode(Mips::B16_MM);
+ if (inMicroMipsMode())
+ Inst.setOpcode(hasMips32r6() ? Mips::BC16_MMR6 : Mips::B16_MM);
} else {
- if (!isIntN(17, Offset.getImm()))
+ if (!isInt<17>(Offset.getImm()))
Error(IDLoc, "branch target out of range");
if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
Error(IDLoc, "branch to misaligned address");
@@ -2143,8 +2495,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
}
Instructions.push_back(Inst);
- // If .set reorder is active, emit a NOP after the branch instruction.
- if (AssemblerOptions.back()->isReorder())
+ // If .set reorder is active and branch instruction has a delay slot,
+ // emit a NOP after it.
+ const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+ if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
createNop(true, IDLoc, Instructions);
return false;
@@ -2175,30 +2529,21 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
}
int64_t ImmValue = ImmOp.getImm();
- if (ImmValue == 0) {
- MCInst BranchInst;
- BranchInst.setOpcode(OpCode);
- BranchInst.addOperand(DstRegOp);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MemOffsetOp);
- Instructions.push_back(BranchInst);
- } else {
+ if (ImmValue == 0)
+ emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
+ Instructions);
+ else {
warnIfNoMacro(IDLoc);
unsigned ATReg = getATReg(IDLoc);
if (!ATReg)
return true;
- if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), IDLoc,
- Instructions))
+ if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), true,
+ IDLoc, Instructions))
return true;
- MCInst BranchInst;
- BranchInst.setOpcode(OpCode);
- BranchInst.addOperand(DstRegOp);
- BranchInst.addOperand(MCOperand::createReg(ATReg));
- BranchInst.addOperand(MemOffsetOp);
- Instructions.push_back(BranchInst);
+ emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, Instructions);
}
return false;
}
@@ -2206,7 +2551,6 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions,
bool isLoad, bool isImmOpnd) {
- MCInst TempInst;
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
@@ -2227,8 +2571,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
HiOffset++;
} else
ExprOffset = Inst.getOperand(2).getExpr();
- // All instructions will have the same location.
- TempInst.setLoc(IDLoc);
// These are some of the types of expansions we perform here:
// 1) lw $8, sym => lui $8, %hi(sym)
// lw $8, %lo(sym)($8)
@@ -2267,40 +2609,20 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
return;
}
- TempInst.setOpcode(Mips::LUi);
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- if (isImmOpnd)
- TempInst.addOperand(MCOperand::createImm(HiOffset));
- else {
- const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
- TempInst.addOperand(MCOperand::createExpr(HiExpr));
- }
- // Add the instruction to the list.
- Instructions.push_back(TempInst);
- // Prepare TempInst for next instruction.
- TempInst.clear();
+ emitRX(Mips::LUi, TmpRegNum,
+ isImmOpnd ? MCOperand::createImm(HiOffset)
+ : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "hi")),
+ IDLoc, Instructions);
// Add temp register to base.
- if (BaseRegNum != Mips::ZERO) {
- TempInst.setOpcode(Mips::ADDu);
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- TempInst.addOperand(MCOperand::createReg(BaseRegNum));
- Instructions.push_back(TempInst);
- TempInst.clear();
- }
+ if (BaseRegNum != Mips::ZERO)
+ emitRRR(Mips::ADDu, TmpRegNum, TmpRegNum, BaseRegNum, IDLoc, Instructions);
// And finally, create original instruction with low part
// of offset and new base.
- TempInst.setOpcode(Inst.getOpcode());
- TempInst.addOperand(MCOperand::createReg(RegOpNum));
- TempInst.addOperand(MCOperand::createReg(TmpRegNum));
- if (isImmOpnd)
- TempInst.addOperand(MCOperand::createImm(LoOffset));
- else {
- const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
- TempInst.addOperand(MCOperand::createExpr(LoExpr));
- }
- Instructions.push_back(TempInst);
- TempInst.clear();
+ emitRRX(Inst.getOpcode(), RegOpNum, TmpRegNum,
+ isImmOpnd
+ ? MCOperand::createImm(LoOffset)
+ : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "lo")),
+ IDLoc, Instructions);
}
bool
@@ -2316,10 +2638,16 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
Inst.getOperand(OpNum - 1).getImm() >= 0 &&
- Inst.getOperand(OpNum - 2).getReg() == Mips::SP &&
- Inst.getOperand(OpNum - 3).getReg() == Mips::RA)
+ (Inst.getOperand(OpNum - 2).getReg() == Mips::SP ||
+ Inst.getOperand(OpNum - 2).getReg() == Mips::SP_64) &&
+ (Inst.getOperand(OpNum - 3).getReg() == Mips::RA ||
+ Inst.getOperand(OpNum - 3).getReg() == Mips::RA_64)) {
// It can be implemented as SWM16 or LWM16 instruction.
- NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+ if (inMicroMipsMode() && hasMips32r6())
+ NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MMR6 : Mips::LWM16_MMR6;
+ else
+ NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+ }
Inst.setOpcode(NewOpcode);
Instructions.push_back(Inst);
@@ -2328,44 +2656,126 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
+ bool EmittedNoMacroWarning = false;
unsigned PseudoOpcode = Inst.getOpcode();
unsigned SrcReg = Inst.getOperand(0).getReg();
- unsigned TrgReg = Inst.getOperand(1).getReg();
+ const MCOperand &TrgOp = Inst.getOperand(1);
const MCExpr *OffsetExpr = Inst.getOperand(2).getExpr();
unsigned ZeroSrcOpcode, ZeroTrgOpcode;
- bool ReverseOrderSLT, IsUnsigned, AcceptsEquality;
+ bool ReverseOrderSLT, IsUnsigned, IsLikely, AcceptsEquality;
+
+ unsigned TrgReg;
+ if (TrgOp.isReg())
+ TrgReg = TrgOp.getReg();
+ else if (TrgOp.isImm()) {
+ warnIfNoMacro(IDLoc);
+ EmittedNoMacroWarning = true;
+
+ TrgReg = getATReg(IDLoc);
+ if (!TrgReg)
+ return true;
+
+ switch(PseudoOpcode) {
+ default:
+ llvm_unreachable("unknown opcode for branch pseudo-instruction");
+ case Mips::BLTImmMacro:
+ PseudoOpcode = Mips::BLT;
+ break;
+ case Mips::BLEImmMacro:
+ PseudoOpcode = Mips::BLE;
+ break;
+ case Mips::BGEImmMacro:
+ PseudoOpcode = Mips::BGE;
+ break;
+ case Mips::BGTImmMacro:
+ PseudoOpcode = Mips::BGT;
+ break;
+ case Mips::BLTUImmMacro:
+ PseudoOpcode = Mips::BLTU;
+ break;
+ case Mips::BLEUImmMacro:
+ PseudoOpcode = Mips::BLEU;
+ break;
+ case Mips::BGEUImmMacro:
+ PseudoOpcode = Mips::BGEU;
+ break;
+ case Mips::BGTUImmMacro:
+ PseudoOpcode = Mips::BGTU;
+ break;
+ case Mips::BLTLImmMacro:
+ PseudoOpcode = Mips::BLTL;
+ break;
+ case Mips::BLELImmMacro:
+ PseudoOpcode = Mips::BLEL;
+ break;
+ case Mips::BGELImmMacro:
+ PseudoOpcode = Mips::BGEL;
+ break;
+ case Mips::BGTLImmMacro:
+ PseudoOpcode = Mips::BGTL;
+ break;
+ case Mips::BLTULImmMacro:
+ PseudoOpcode = Mips::BLTUL;
+ break;
+ case Mips::BLEULImmMacro:
+ PseudoOpcode = Mips::BLEUL;
+ break;
+ case Mips::BGEULImmMacro:
+ PseudoOpcode = Mips::BGEUL;
+ break;
+ case Mips::BGTULImmMacro:
+ PseudoOpcode = Mips::BGTUL;
+ break;
+ }
+
+ if (loadImmediate(TrgOp.getImm(), TrgReg, Mips::NoRegister, !isGP64bit(),
+ false, IDLoc, Instructions))
+ return true;
+ }
switch (PseudoOpcode) {
case Mips::BLT:
case Mips::BLTU:
+ case Mips::BLTL:
+ case Mips::BLTUL:
AcceptsEquality = false;
ReverseOrderSLT = false;
- IsUnsigned = (PseudoOpcode == Mips::BLTU);
+ IsUnsigned = ((PseudoOpcode == Mips::BLTU) || (PseudoOpcode == Mips::BLTUL));
+ IsLikely = ((PseudoOpcode == Mips::BLTL) || (PseudoOpcode == Mips::BLTUL));
ZeroSrcOpcode = Mips::BGTZ;
ZeroTrgOpcode = Mips::BLTZ;
break;
case Mips::BLE:
case Mips::BLEU:
+ case Mips::BLEL:
+ case Mips::BLEUL:
AcceptsEquality = true;
ReverseOrderSLT = true;
- IsUnsigned = (PseudoOpcode == Mips::BLEU);
+ IsUnsigned = ((PseudoOpcode == Mips::BLEU) || (PseudoOpcode == Mips::BLEUL));
+ IsLikely = ((PseudoOpcode == Mips::BLEL) || (PseudoOpcode == Mips::BLEUL));
ZeroSrcOpcode = Mips::BGEZ;
ZeroTrgOpcode = Mips::BLEZ;
break;
case Mips::BGE:
case Mips::BGEU:
+ case Mips::BGEL:
+ case Mips::BGEUL:
AcceptsEquality = true;
ReverseOrderSLT = false;
- IsUnsigned = (PseudoOpcode == Mips::BGEU);
+ IsUnsigned = ((PseudoOpcode == Mips::BGEU) || (PseudoOpcode == Mips::BGEUL));
+ IsLikely = ((PseudoOpcode == Mips::BGEL) || (PseudoOpcode == Mips::BGEUL));
ZeroSrcOpcode = Mips::BLEZ;
ZeroTrgOpcode = Mips::BGEZ;
break;
case Mips::BGT:
case Mips::BGTU:
+ case Mips::BGTL:
+ case Mips::BGTUL:
AcceptsEquality = false;
ReverseOrderSLT = true;
- IsUnsigned = (PseudoOpcode == Mips::BGTU);
+ IsUnsigned = ((PseudoOpcode == Mips::BGTU) || (PseudoOpcode == Mips::BGTUL));
+ IsLikely = ((PseudoOpcode == Mips::BGTL) || (PseudoOpcode == Mips::BGTUL));
ZeroSrcOpcode = Mips::BLTZ;
ZeroTrgOpcode = Mips::BGTZ;
break;
@@ -2373,7 +2783,6 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
llvm_unreachable("unknown opcode for branch pseudo-instruction");
}
- MCInst BranchInst;
bool IsTrgRegZero = (TrgReg == Mips::ZERO);
bool IsSrcRegZero = (SrcReg == Mips::ZERO);
if (IsSrcRegZero && IsTrgRegZero) {
@@ -2381,51 +2790,37 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
// with GAS' behaviour. However, they may not generate the most efficient
// code in some circumstances.
if (PseudoOpcode == Mips::BLT) {
- BranchInst.setOpcode(Mips::BLTZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BLTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
if (PseudoOpcode == Mips::BLE) {
- BranchInst.setOpcode(Mips::BLEZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BLEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
if (PseudoOpcode == Mips::BGE) {
- BranchInst.setOpcode(Mips::BGEZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BGEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
if (PseudoOpcode == Mips::BGT) {
- BranchInst.setOpcode(Mips::BGTZ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(Mips::BGTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
if (PseudoOpcode == Mips::BGTU) {
- BranchInst.setOpcode(Mips::BNE);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BNE, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
return false;
}
if (AcceptsEquality) {
// If both registers are $0 and the pseudo-branch accepts equality, it
// will always be taken, so we emit an unconditional branch.
- BranchInst.setOpcode(Mips::BEQ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
@@ -2449,11 +2844,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
// the pseudo-branch will always be taken, so we emit an unconditional
// branch.
// This only applies to unsigned pseudo-branches.
- BranchInst.setOpcode(Mips::BEQ);
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
Warning(IDLoc, "branch is always taken");
return false;
}
@@ -2470,21 +2862,17 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
//
// Because only BLEU and BGEU branch on equality, we can use the
// AcceptsEquality variable to decide when to emit the BEQZ.
- BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
- BranchInst.addOperand(
- MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRX(AcceptsEquality ? Mips::BEQ : Mips::BNE,
+ IsSrcRegZero ? TrgReg : SrcReg, Mips::ZERO,
+ MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
return false;
}
// If we have a signed pseudo-branch and one of the registers is $0,
// we can use an appropriate compare-to-zero branch. We select which one
// to use in the switch statement above.
- BranchInst.setOpcode(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode);
- BranchInst.addOperand(MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRX(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode,
+ IsSrcRegZero ? TrgReg : SrcReg, MCOperand::createExpr(OffsetExpr),
+ IDLoc, Instructions);
return false;
}
@@ -2494,7 +2882,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
if (!ATRegNum)
return true;
- warnIfNoMacro(IDLoc);
+ if (!EmittedNoMacroWarning)
+ warnIfNoMacro(IDLoc);
// SLT fits well with 2 of our 4 pseudo-branches:
// BLT, where $rs < $rt, translates into "slt $at, $rs, $rt" and
@@ -2511,23 +2900,135 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
//
// The same applies to the unsigned variants, except that SLTu is used
// instead of SLT.
- MCInst SetInst;
- SetInst.setOpcode(IsUnsigned ? Mips::SLTu : Mips::SLT);
- SetInst.addOperand(MCOperand::createReg(ATRegNum));
- SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? TrgReg : SrcReg));
- SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? SrcReg : TrgReg));
- Instructions.push_back(SetInst);
-
- BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
- BranchInst.addOperand(MCOperand::createReg(ATRegNum));
- BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
- BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
- Instructions.push_back(BranchInst);
+ emitRRR(IsUnsigned ? Mips::SLTu : Mips::SLT, ATRegNum,
+ ReverseOrderSLT ? TrgReg : SrcReg, ReverseOrderSLT ? SrcReg : TrgReg,
+ IDLoc, Instructions);
+
+ emitRRX(IsLikely ? (AcceptsEquality ? Mips::BEQL : Mips::BNEL)
+ : (AcceptsEquality ? Mips::BEQ : Mips::BNE),
+ ATRegNum, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+ Instructions);
return false;
}
-bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
- SmallVectorImpl<MCInst> &Instructions) {
+bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions,
+ const bool IsMips64, const bool Signed) {
+ if (hasMips32r6()) {
+ Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+ return false;
+ }
+
+ warnIfNoMacro(IDLoc);
+
+ const MCOperand &RsRegOp = Inst.getOperand(0);
+ assert(RsRegOp.isReg() && "expected register operand kind");
+ unsigned RsReg = RsRegOp.getReg();
+
+ const MCOperand &RtRegOp = Inst.getOperand(1);
+ assert(RtRegOp.isReg() && "expected register operand kind");
+ unsigned RtReg = RtRegOp.getReg();
+ unsigned DivOp;
+ unsigned ZeroReg;
+
+ if (IsMips64) {
+ DivOp = Signed ? Mips::DSDIV : Mips::DUDIV;
+ ZeroReg = Mips::ZERO_64;
+ } else {
+ DivOp = Signed ? Mips::SDIV : Mips::UDIV;
+ ZeroReg = Mips::ZERO;
+ }
+
+ bool UseTraps = useTraps();
+
+ if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) {
+ if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ if (IsMips64) {
+ if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) {
+ if (UseTraps) {
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ return false;
+ }
+
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+ return false;
+ }
+ } else {
+ emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+ return false;
+ }
+ }
+
+ if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) {
+ Warning(IDLoc, "division by zero");
+ if (Signed) {
+ if (UseTraps) {
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ return false;
+ }
+
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+ return false;
+ }
+ }
+
+ // FIXME: The values for these two BranchTarget variables may be different in
+ // micromips. These magic numbers need to be removed.
+ unsigned BranchTargetNoTraps;
+ unsigned BranchTarget;
+
+ if (UseTraps) {
+ BranchTarget = IsMips64 ? 12 : 8;
+ emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+ } else {
+ BranchTarget = IsMips64 ? 20 : 16;
+ BranchTargetNoTraps = 8;
+ // Branch to the li instruction.
+ emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc,
+ Instructions);
+ }
+
+ emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+
+ if (!UseTraps)
+ emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+
+ if (!Signed) {
+ emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+ return false;
+ }
+
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, Instructions);
+ if (IsMips64) {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+ emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, Instructions);
+ emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, Instructions);
+ } else {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+ emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, Instructions);
+ }
+
+ if (UseTraps)
+ emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, Instructions);
+ else {
+ // Branch to the mflo instruction.
+ emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, Instructions);
+ emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, Instructions);
+ emitII(Mips::BREAK, 0x6, 0, IDLoc, Instructions);
+ }
+ emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+ return false;
+}
+
+bool MipsAsmParser::expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
if (hasMips32r6() || hasMips64r6()) {
Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
return false;
@@ -2562,7 +3063,7 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
LoadedOffsetInAT = true;
if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
- IDLoc, Instructions))
+ true, IDLoc, Instructions))
return true;
// NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2590,33 +3091,15 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
- MCInst TmpInst;
- TmpInst.setOpcode(Mips::LBu);
- TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg));
- TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
- TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::LBu);
- TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg));
- TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
- TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::SLL);
- TmpInst.addOperand(MCOperand::createReg(SllReg));
- TmpInst.addOperand(MCOperand::createReg(SllReg));
- TmpInst.addOperand(MCOperand::createImm(8));
- Instructions.push_back(TmpInst);
-
- TmpInst.clear();
- TmpInst.setOpcode(Mips::OR);
- TmpInst.addOperand(MCOperand::createReg(DstReg));
- TmpInst.addOperand(MCOperand::createReg(DstReg));
- TmpInst.addOperand(MCOperand::createReg(ATReg));
- Instructions.push_back(TmpInst);
+ emitRRI(Signed ? Mips::LB : Mips::LBu, FirstLbuDstReg, LbuSrcReg,
+ FirstLbuOffset, IDLoc, Instructions);
+
+ emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondLbuOffset, IDLoc,
+ Instructions);
+
+ emitRRI(Mips::SLL, SllReg, SllReg, 8, IDLoc, Instructions);
+
+ emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, Instructions);
return false;
}
@@ -2654,7 +3137,7 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
warnIfNoMacro(IDLoc);
if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
- IDLoc, Instructions))
+ true, IDLoc, Instructions))
return true;
// NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2677,37 +3160,373 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
RightLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
}
- MCInst LeftLoadInst;
- LeftLoadInst.setOpcode(Mips::LWL);
- LeftLoadInst.addOperand(DstRegOp);
- LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
- LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset));
- Instructions.push_back(LeftLoadInst);
+ emitRRI(Mips::LWL, DstRegOp.getReg(), FinalSrcReg, LeftLoadOffset, IDLoc,
+ Instructions);
- MCInst RightLoadInst;
- RightLoadInst.setOpcode(Mips::LWR);
- RightLoadInst.addOperand(DstRegOp);
- RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
- RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset ));
- Instructions.push_back(RightLoadInst);
+ emitRRI(Mips::LWR, DstRegOp.getReg(), FinalSrcReg, RightLoadOffset, IDLoc,
+ Instructions);
return false;
}
+bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ assert (Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert (Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned FinalDstReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+
+ bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue);
+
+ unsigned FinalOpcode = Inst.getOpcode();
+
+ if (DstReg == SrcReg) {
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+ FinalDstReg = DstReg;
+ DstReg = ATReg;
+ }
+
+ if (!loadImmediate(ImmValue, DstReg, Mips::NoRegister, Is32Bit, false, Inst.getLoc(), Instructions)) {
+ switch (FinalOpcode) {
+ default:
+ llvm_unreachable("unimplemented expansion");
+ case (Mips::ADDi):
+ FinalOpcode = Mips::ADD;
+ break;
+ case (Mips::ADDiu):
+ FinalOpcode = Mips::ADDu;
+ break;
+ case (Mips::ANDi):
+ FinalOpcode = Mips::AND;
+ break;
+ case (Mips::NORImm):
+ FinalOpcode = Mips::NOR;
+ break;
+ case (Mips::ORi):
+ FinalOpcode = Mips::OR;
+ break;
+ case (Mips::SLTi):
+ FinalOpcode = Mips::SLT;
+ break;
+ case (Mips::SLTiu):
+ FinalOpcode = Mips::SLTu;
+ break;
+ case (Mips::XORi):
+ FinalOpcode = Mips::XOR;
+ break;
+ }
+
+ if (FinalDstReg == Mips::NoRegister)
+ emitRRR(FinalOpcode, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+ else
+ emitRRR(FinalOpcode, FinalDstReg, FinalDstReg, DstReg, IDLoc,
+ Instructions);
+ return false;
+ }
+ return true;
+}
+
+bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ unsigned TReg = Inst.getOperand(2).getReg();
+ unsigned TmpReg = DReg;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips32r2()) {
+
+ if (DReg == SReg) {
+ TmpReg = getATReg(Inst.getLoc());
+ if (!TmpReg)
+ return true;
+ }
+
+ if (Inst.getOpcode() == Mips::ROL) {
+ emitRRR(Mips::SUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::ROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::ROR) {
+ emitRRR(Mips::ROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips32()) {
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::ROL:
+ FirstShift = Mips::SRLV;
+ SecondShift = Mips::SLLV;
+ break;
+ case Mips::ROR:
+ FirstShift = Mips::SLLV;
+ SecondShift = Mips::SRLV;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRR(Mips::SUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+ emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm();
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips32r2()) {
+
+ if (Inst.getOpcode() == Mips::ROLImm) {
+ uint64_t MaxShift = 32;
+ uint64_t ShiftValue = ImmValue;
+ if (ImmValue != 0)
+ ShiftValue = MaxShift - ImmValue;
+ emitRRI(Mips::ROTR, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::RORImm) {
+ emitRRI(Mips::ROTR, DReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips32()) {
+
+ if (ImmValue == 0) {
+ emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::ROLImm:
+ FirstShift = Mips::SLL;
+ SecondShift = Mips::SRL;
+ break;
+ case Mips::RORImm:
+ FirstShift = Mips::SRL;
+ SecondShift = Mips::SLL;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRI(FirstShift, ATReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+ emitRRI(SecondShift, DReg, SReg, 32 - ImmValue, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ unsigned TReg = Inst.getOperand(2).getReg();
+ unsigned TmpReg = DReg;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ if (hasMips64r2()) {
+
+ if (TmpReg == SReg) {
+ TmpReg = getATReg(Inst.getLoc());
+ if (!TmpReg)
+ return true;
+ }
+
+ if (Inst.getOpcode() == Mips::DROL) {
+ emitRRR(Mips::DSUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::DROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ if (Inst.getOpcode() == Mips::DROR) {
+ emitRRR(Mips::DROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ return true;
+ }
+
+ if (hasMips64()) {
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::DROL:
+ FirstShift = Mips::DSRLV;
+ SecondShift = Mips::DSLLV;
+ break;
+ case Mips::DROR:
+ FirstShift = Mips::DSLLV;
+ SecondShift = Mips::DSRLV;
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRR(Mips::DSUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+ emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+ emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
+bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DReg = Inst.getOperand(0).getReg();
+ unsigned SReg = Inst.getOperand(1).getReg();
+ int64_t ImmValue = Inst.getOperand(2).getImm() % 64;
+
+ unsigned FirstShift = Mips::NOP;
+ unsigned SecondShift = Mips::NOP;
+
+ MCInst TmpInst;
+
+ if (hasMips64r2()) {
+
+ unsigned FinalOpcode = Mips::NOP;
+ if (ImmValue == 0)
+ FinalOpcode = Mips::DROTR;
+ else if (ImmValue % 32 == 0)
+ FinalOpcode = Mips::DROTR32;
+ else if ((ImmValue >= 1) && (ImmValue <= 32)) {
+ if (Inst.getOpcode() == Mips::DROLImm)
+ FinalOpcode = Mips::DROTR32;
+ else
+ FinalOpcode = Mips::DROTR;
+ } else if (ImmValue >= 33) {
+ if (Inst.getOpcode() == Mips::DROLImm)
+ FinalOpcode = Mips::DROTR;
+ else
+ FinalOpcode = Mips::DROTR32;
+ }
+
+ uint64_t ShiftValue = ImmValue % 32;
+ if (Inst.getOpcode() == Mips::DROLImm)
+ ShiftValue = (32 - ImmValue % 32) % 32;
+
+ emitRRI(FinalOpcode, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ if (hasMips64()) {
+
+ if (ImmValue == 0) {
+ emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+ return false;
+ }
+
+ switch (Inst.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected instruction opcode");
+ case Mips::DROLImm:
+ if ((ImmValue >= 1) && (ImmValue <= 31)) {
+ FirstShift = Mips::DSLL;
+ SecondShift = Mips::DSRL32;
+ }
+ if (ImmValue == 32) {
+ FirstShift = Mips::DSLL32;
+ SecondShift = Mips::DSRL32;
+ }
+ if ((ImmValue >= 33) && (ImmValue <= 63)) {
+ FirstShift = Mips::DSLL32;
+ SecondShift = Mips::DSRL;
+ }
+ break;
+ case Mips::DRORImm:
+ if ((ImmValue >= 1) && (ImmValue <= 31)) {
+ FirstShift = Mips::DSRL;
+ SecondShift = Mips::DSLL32;
+ }
+ if (ImmValue == 32) {
+ FirstShift = Mips::DSRL32;
+ SecondShift = Mips::DSLL32;
+ }
+ if ((ImmValue >= 33) && (ImmValue <= 63)) {
+ FirstShift = Mips::DSRL32;
+ SecondShift = Mips::DSLL;
+ }
+ break;
+ }
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ emitRRI(FirstShift, ATReg, SReg, ImmValue % 32, Inst.getLoc(), Instructions);
+ emitRRI(SecondShift, DReg, SReg, (32 - ImmValue % 32) % 32, Inst.getLoc(), Instructions);
+ emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+ return false;
+ }
+
+ return true;
+}
+
void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
- MCInst NopInst;
- if (hasShortDelaySlot) {
- NopInst.setOpcode(Mips::MOVE16_MM);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- } else {
- NopInst.setOpcode(Mips::SLL);
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
- NopInst.addOperand(MCOperand::createImm(0));
- }
- Instructions.push_back(NopInst);
+ if (hasShortDelaySlot)
+ emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, Instructions);
+ else
+ emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, Instructions);
}
void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
@@ -2717,6 +3536,24 @@ void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
Instructions);
}
+void MipsAsmParser::createCpRestoreMemOp(
+ bool IsLoad, int StackOffset, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ // If the offset can not fit into 16 bits, we need to expand.
+ if (!isInt<16>(StackOffset)) {
+ MCInst MemInst;
+ MemInst.setOpcode(IsLoad ? Mips::LW : Mips::SW);
+ MemInst.addOperand(MCOperand::createReg(Mips::GP));
+ MemInst.addOperand(MCOperand::createReg(Mips::SP));
+ MemInst.addOperand(MCOperand::createImm(StackOffset));
+ expandMemInst(MemInst, IDLoc, Instructions, IsLoad, true /*HasImmOpnd*/);
+ return;
+ }
+
+ emitRRI(IsLoad ? Mips::LW : Mips::SW, Mips::GP, Mips::SP, StackOffset, IDLoc,
+ Instructions);
+}
+
unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// As described by the Mips32r2 spec, the registers Rd and Rs for
// jalr.hb must be different.
@@ -2729,6 +3566,17 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
+static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
+ uint64_t ErrorInfo) {
+ if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) {
+ SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ return Loc;
+ return ErrorLoc;
+ }
+ return Loc;
+}
+
bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -2745,7 +3593,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (processInstruction(Inst, IDLoc, Instructions))
return true;
for (unsigned i = 0; i < Instructions.size(); i++)
- Out.EmitInstruction(Instructions[i], STI);
+ Out.EmitInstruction(Instructions[i], getSTI());
return false;
}
case Match_MissingFeature:
@@ -2757,7 +3605,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc();
+ ErrorLoc = Operands[ErrorInfo]->getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -2768,6 +3616,58 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "invalid instruction");
case Match_RequiresDifferentSrcAndDst:
return Error(IDLoc, "source and destination must be different");
+ case Match_Immz:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
+ case Match_UImm1_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 1-bit unsigned immediate");
+ case Match_UImm2_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 2-bit unsigned immediate");
+ case Match_UImm2_1:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 1 .. 4");
+ case Match_UImm3_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 3-bit unsigned immediate");
+ case Match_UImm4_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 4-bit unsigned immediate");
+ case Match_UImm5_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 5-bit unsigned immediate");
+ case Match_UImm5_1:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 1 .. 32");
+ case Match_UImm5_32:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 32 .. 63");
+ case Match_UImm5_33:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected immediate in range 33 .. 64");
+ case Match_UImm5_0_Report_UImm6:
+ // This is used on UImm5 operands that have a corresponding UImm5_32
+ // operand to avoid confusing the user.
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit unsigned immediate");
+ case Match_UImm5_Lsl2:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected both 7-bit unsigned immediate and multiple of 4");
+ case Match_UImm6_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit unsigned immediate");
+ case Match_SImm6:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 6-bit signed immediate");
+ case Match_UImm7_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 7-bit unsigned immediate");
+ case Match_UImm8_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 8-bit unsigned immediate");
+ case Match_UImm10_0:
+ return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+ "expected 10-bit unsigned immediate");
}
llvm_unreachable("Implement any new match types added!");
@@ -3264,7 +4164,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]);
- if (Mnemonic.getToken() == "la") {
+ if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") {
SMLoc E =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
@@ -3598,12 +4498,15 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
if (RegRange) {
// Remove last register operand because registers from register range
// should be inserted first.
- if (RegNo == Mips::RA) {
+ if ((isGP64bit() && RegNo == Mips::RA_64) ||
+ (!isGP64bit() && RegNo == Mips::RA)) {
Regs.push_back(RegNo);
} else {
unsigned TmpReg = PrevReg + 1;
while (TmpReg <= RegNo) {
- if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) {
+ if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) ||
+ (((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) &&
+ isGP64bit())) {
Error(E, "invalid register operand");
return MatchOperand_ParseFail;
}
@@ -3615,16 +4518,23 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
RegRange = false;
} else {
- if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) &&
- (RegNo != Mips::RA)) {
+ if ((PrevReg == Mips::NoRegister) &&
+ ((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) ||
+ (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) {
Error(E, "$16 or $31 expected");
return MatchOperand_ParseFail;
- } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) &&
- (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ } else if (!(((RegNo == Mips::FP || RegNo == Mips::RA ||
+ (RegNo >= Mips::S0 && RegNo <= Mips::S7)) &&
+ !isGP64bit()) ||
+ ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 ||
+ (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) &&
+ isGP64bit()))) {
Error(E, "invalid register operand");
return MatchOperand_ParseFail;
} else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
- (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+ ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) ||
+ (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 &&
+ isGP64bit()))) {
Error(E, "consecutive register numbers expected");
return MatchOperand_ParseFail;
}
@@ -4152,6 +5062,7 @@ bool MipsAsmParser::parseSetPopDirective() {
if (AssemblerOptions.size() == 2)
return reportParseError(Loc, ".set pop with no .set push");
+ MCSubtargetInfo &STI = copySTI();
AssemblerOptions.pop_back();
setAvailableFeatures(
ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures()));
@@ -4225,6 +5136,7 @@ bool MipsAsmParser::parseSetMips0Directive() {
return reportParseError("unexpected token, expected end of statement");
// Reset assembler options to their initial values.
+ MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures()));
STI.setFeatureBits(AssemblerOptions.front()->getFeatures());
@@ -4366,6 +5278,14 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
return true;
}
+// Used to determine if .cpload, .cprestore, and .cpsetup have any effect.
+// In this class, it is only used for .cprestore.
+// FIXME: Only keep track of IsPicEnabled in one place, instead of in both
+// MipsTargetELFStreamer and MipsAsmParser.
+bool MipsAsmParser::isPicAndNotNxxAbi() {
+ return inPicMode() && !(isABI_N32() || isABI_N64());
+}
+
bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
if (AssemblerOptions.back()->isReorder())
Warning(Loc, ".cpload should be inside a noreorder section");
@@ -4398,6 +5318,54 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
return false;
}
+bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
+ MCAsmParser &Parser = getParser();
+
+ // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+ // is used in non-PIC mode.
+
+ if (inMips16Mode()) {
+ reportParseError(".cprestore is not supported in Mips16 mode");
+ return false;
+ }
+
+ // Get the stack offset value.
+ const MCExpr *StackOffset;
+ int64_t StackOffsetVal;
+ if (Parser.parseExpression(StackOffset)) {
+ reportParseError("expected stack offset value");
+ return false;
+ }
+
+ if (!StackOffset->evaluateAsAbsolute(StackOffsetVal)) {
+ reportParseError("stack offset is not an absolute expression");
+ return false;
+ }
+
+ if (StackOffsetVal < 0) {
+ Warning(Loc, ".cprestore with negative stack offset has no effect");
+ IsCpRestoreSet = false;
+ } else {
+ IsCpRestoreSet = true;
+ CpRestoreOffset = StackOffsetVal;
+ }
+
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ // Store the $gp on the stack.
+ SmallVector<MCInst, 3> StoreInsts;
+ createCpRestoreMemOp(false /*IsLoad*/, CpRestoreOffset /*StackOffset*/, Loc,
+ StoreInsts);
+
+ getTargetStreamer().emitDirectiveCpRestore(StoreInsts, CpRestoreOffset);
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveCPSetup() {
MCAsmParser &Parser = getParser();
unsigned FuncReg;
@@ -4427,16 +5395,19 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
ResTy = parseAnyRegister(TmpReg);
if (ResTy == MatchOperand_NoMatch) {
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Integer)) {
- Save = Tok.getIntVal();
- SaveIsReg = false;
- Parser.Lex();
- } else {
- reportParseError("expected save register or stack offset");
+ const MCExpr *OffsetExpr;
+ int64_t OffsetVal;
+ SMLoc ExprLoc = getLexer().getLoc();
+
+ if (Parser.parseExpression(OffsetExpr) ||
+ !OffsetExpr->evaluateAsAbsolute(OffsetVal)) {
+ reportParseError(ExprLoc, "expected save register or stack offset");
Parser.eatToEndOfStatement();
return false;
}
+
+ Save = OffsetVal;
+ SaveIsReg = false;
} else {
MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
if (!SaveOpnd.isGPRAsmReg()) {
@@ -4462,11 +5433,20 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
}
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
+ CpSaveLocation = Save;
+ CpSaveLocationIsRegister = SaveIsReg;
+
getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, Ref->getSymbol(),
SaveIsReg);
return false;
}
+bool MipsAsmParser::parseDirectiveCPReturn() {
+ getTargetStreamer().emitDirectiveCpreturn(CpSaveLocation,
+ CpSaveLocationIsRegister);
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveNaN() {
MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4655,6 +5635,9 @@ bool MipsAsmParser::parseDirectiveOption() {
StringRef Option = Tok.getIdentifier();
if (Option == "pic0") {
+ // MipsAsmParser needs to know if the current PIC mode changes.
+ IsPicEnabled = false;
+
getTargetStreamer().emitDirectiveOptionPic0();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4666,6 +5649,9 @@ bool MipsAsmParser::parseDirectiveOption() {
}
if (Option == "pic2") {
+ // MipsAsmParser needs to know if the current PIC mode changes.
+ IsPicEnabled = true;
+
getTargetStreamer().emitDirectiveOptionPic2();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4924,6 +5910,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpload")
return parseDirectiveCpLoad(DirectiveID.getLoc());
+ if (IDVal == ".cprestore")
+ return parseDirectiveCpRestore(DirectiveID.getLoc());
if (IDVal == ".dword") {
parseDataDirective(8, DirectiveID.getLoc());
return false;
@@ -4974,6 +5962,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitDirectiveEnt(*Sym);
CurrentFn = Sym;
+ IsCpRestoreSet = false;
return false;
}
@@ -5002,6 +5991,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitDirectiveEnd(SymbolName);
CurrentFn = nullptr;
+ IsCpRestoreSet = false;
return false;
}
@@ -5073,6 +6063,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
getTargetStreamer().emitFrame(StackReg, FrameSizeVal,
ReturnRegOpnd.getGPR32Reg());
+ IsCpRestoreSet = false;
return false;
}
@@ -5173,6 +6164,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpsetup")
return parseDirectiveCPSetup();
+ if (IDVal == ".cpreturn")
+ return parseDirectiveCPReturn();
+
if (IDVal == ".module")
return parseDirectiveModule();
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index a34ba3b..3c1a771 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -229,6 +229,13 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+// DecodeBranchTarget26MM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
// DecodeJumpTargetMM - Decode microMIPS jump target, which is
// shifted left by 1 bit.
static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
@@ -241,17 +248,42 @@ static DecodeStatus DecodeMem(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeCacheOp(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
@@ -261,6 +293,11 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -284,6 +321,11 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -330,6 +372,11 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSimm4(MCInst &Inst,
unsigned Value,
uint64_t Address,
@@ -340,23 +387,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
uint64_t Address,
const void *Decoder);
-// Decode the immediate field of an LSA instruction which
-// is off by one.
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeInsSize(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeExtSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder);
-
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -830,9 +869,24 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if (IsMicroMips) {
Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
+
+ if (hasMips32r6()) {
+ DEBUG(dbgs() << "Trying MicroMipsR616 table (16-bit instructions):\n");
+ // Calling the auto-generated decoder function for microMIPS32R6
+ // (and microMIPS64R6) 16-bit instructions.
+ Result = decodeInstruction(DecoderTableMicroMipsR616, Instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 2;
+ return Result;
+ }
+ }
DEBUG(dbgs() << "Trying MicroMips16 table (16-bit instructions):\n");
- // Calling the auto-generated decoder function.
+ // Calling the auto-generated decoder function for microMIPS 16-bit
+ // instructions.
Result = decodeInstruction(DecoderTableMicroMips16, Instr, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail) {
@@ -847,24 +901,33 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n");
// Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32r632, Instr, Insn, Address,
- this, STI);
- } else {
- DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+ Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn, Address,
this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
}
+
+ DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
+ // Calling the auto-generated decoder function.
+ Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
Size = 4;
return Result;
}
+ // This is an invalid instruction. Let the disassembler move forward by the
+ // minimum instruction size.
+ Size = 2;
return MCDisassembler::Fail;
}
Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
- if (Result == MCDisassembler::Fail)
+ if (Result == MCDisassembler::Fail) {
+ Size = 4;
return MCDisassembler::Fail;
+ }
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
@@ -925,6 +988,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return Result;
}
+ Size = 4;
return MCDisassembler::Fail;
}
@@ -1079,10 +1143,66 @@ static DecodeStatus DecodeMem(MCInst &Inst,
Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
- if(Inst.getOpcode() == Mips::SC ||
- Inst.getOpcode() == Mips::SCD){
+ if (Inst.getOpcode() == Mips::SC ||
+ Inst.getOpcode() == Mips::SCD)
Inst.addOperand(MCOperand::createReg(Reg));
- }
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn >> 7);
+ unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if (Inst.getOpcode() == Mips::SCE)
+ Inst.addOperand(MCOperand::createReg(Reg));
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<16>(Insn & 0xffff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
Inst.addOperand(MCOperand::createReg(Reg));
Inst.addOperand(MCOperand::createReg(Base));
@@ -1125,11 +1245,28 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
- int Offset = fieldFromInstruction(Insn, 7, 9);
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+ unsigned Hint = fieldFromInstruction(Insn, 21, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+ Inst.addOperand(MCOperand::createImm(Hint));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn >> 7);
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
@@ -1142,6 +1279,24 @@ static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSyncI(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1157,6 +1312,21 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Immediate = SignExtend32<16>(Insn & 0xffff);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Immediate));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -1220,8 +1390,11 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
return MCDisassembler::Fail;
break;
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
if (DecodeGPRMM16ZeroRegisterClass(Inst, Reg, Address, Decoder)
== MCDisassembler::Fail)
return MCDisassembler::Fail;
@@ -1240,14 +1413,17 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
Inst.addOperand(MCOperand::createImm(Offset));
break;
case Mips::SB16_MM:
+ case Mips::SB16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset));
break;
case Mips::LHU16_MM:
case Mips::SH16_MM:
+ case Mips::SH16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset << 1));
break;
case Mips::LW16_MM:
case Mips::SW16_MM:
+ case Mips::SW16_MMR6:
Inst.addOperand(MCOperand::createImm(Offset << 2));
break;
}
@@ -1291,7 +1467,16 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
- int Offset = SignExtend32<4>(Insn & 0xf);
+ int Offset;
+ switch (Inst.getOpcode()) {
+ case Mips::LWM16_MMR6:
+ case Mips::SWM16_MMR6:
+ Offset = fieldFromInstruction(Insn, 4, 4);
+ break;
+ default:
+ Offset = SignExtend32<4>(Insn & 0xf);
+ break;
+ }
if (DecodeRegListOperand16(Inst, Insn, Address, Decoder)
== MCDisassembler::Fail)
@@ -1303,6 +1488,27 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int Offset = SignExtend32<9>(Insn & 0x1ff);
+ unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+ unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+ Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if (Inst.getOpcode() == Mips::SCE_MM)
+ Inst.addOperand(MCOperand::createReg(Reg));
+
+ Inst.addOperand(MCOperand::createReg(Reg));
+ Inst.addOperand(MCOperand::createReg(Base));
+ Inst.addOperand(MCOperand::createImm(Offset));
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1659,6 +1865,16 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ int32_t BranchOffset = SignExtend32<26>(Offset) << 1;
+
+ Inst.addOperand(MCOperand::createImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -1700,6 +1916,14 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+ unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeSimm4(MCInst &Inst,
unsigned Value,
uint64_t Address,
@@ -1716,12 +1940,12 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- // We add one to the immediate field as it was encoded as 'imm - 1'.
- Inst.addOperand(MCOperand::createImm(Insn + 1));
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+ uint64_t Address,
+ const void *Decoder) {
+ Value &= ((1 << Bits) - 1);
+ Inst.addOperand(MCOperand::createImm(Value + Offset));
return MCDisassembler::Success;
}
@@ -1736,15 +1960,6 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeExtSize(MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- int Size = (int) Insn + 1;
- Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
- return MCDisassembler::Success;
-}
-
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4));
@@ -1792,15 +2007,21 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst,
uint64_t Address,
const void *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
- Mips::S6, Mips::FP};
+ Mips::S6, Mips::S7, Mips::FP};
unsigned RegNum;
unsigned RegLst = fieldFromInstruction(Insn, 21, 5);
+
// Empty register lists are not allowed.
if (RegLst == 0)
return MCDisassembler::Fail;
RegNum = RegLst & 0xf;
+
+ // RegLst values 10-15, and 26-31 are reserved.
+ if (RegNum > 9)
+ return MCDisassembler::Fail;
+
for (unsigned i = 0; i < RegNum; i++)
Inst.addOperand(MCOperand::createReg(Regs[i]));
@@ -1814,7 +2035,16 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder) {
unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
- unsigned RegLst = fieldFromInstruction(Insn, 4, 2);
+ unsigned RegLst;
+ switch(Inst.getOpcode()) {
+ default:
+ RegLst = fieldFromInstruction(Insn, 4, 2);
+ break;
+ case Mips::LWM16_MMR6:
+ case Mips::SWM16_MMR6:
+ RegLst = fieldFromInstruction(Insn, 8, 2);
+ break;
+ }
unsigned RegNum = RegLst & 0x3;
for (unsigned i = 0; i <= RegNum; i++)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index a5637b1..a7b7d2e 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -235,7 +235,9 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
case Mips::SWM32_MM:
case Mips::LWM32_MM:
case Mips::SWM16_MM:
+ case Mips::SWM16_MMR6:
case Mips::LWM16_MM:
+ case Mips::LWM16_MMR6:
opNum = MI->getNumOperands() - 2;
break;
}
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 713f35c..0e61ea6 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -73,8 +73,6 @@ enum CondCode {
const char *MipsFCCToString(Mips::CondCode CC);
} // end namespace Mips
-class TargetMachine;
-
class MipsInstPrinter : public MCInstPrinter {
public:
MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 8e6c9e6..cdcc392 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -23,7 +23,7 @@ static const MCPhysReg Mips64IntRegs[8] = {
Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
}
-const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
if (IsO32())
return makeArrayRef(O32IntRegs);
if (IsN32() || IsN64())
@@ -31,7 +31,7 @@ const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
llvm_unreachable("Unhandled ABI");
}
-const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
if (IsO32())
return makeArrayRef(O32IntRegs);
if (IsN32() || IsN64())
@@ -78,7 +78,6 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
.Case("mips32r3", MipsABIInfo::O32())
.Case("mips32r5", MipsABIInfo::O32())
.Case("mips32r6", MipsABIInfo::O32())
- .Case("mips16", MipsABIInfo::O32())
.Case("mips3", MipsABIInfo::N64())
.Case("mips4", MipsABIInfo::N64())
.Case("mips5", MipsABIInfo::N64())
@@ -107,6 +106,10 @@ unsigned MipsABIInfo::GetNullPtr() const {
return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO;
}
+unsigned MipsABIInfo::GetZeroReg() const {
+ return AreGprs64bit() ? Mips::ZERO_64 : Mips::ZERO;
+}
+
unsigned MipsABIInfo::GetPtrAdduOp() const {
return ArePtrs64bit() ? Mips::DADDu : Mips::ADDu;
}
@@ -115,6 +118,10 @@ unsigned MipsABIInfo::GetPtrAddiuOp() const {
return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu;
}
+unsigned MipsABIInfo::GetGPRMoveOp() const {
+ return ArePtrs64bit() ? Mips::OR64 : Mips::OR;
+}
+
unsigned MipsABIInfo::GetEhDataReg(unsigned I) const {
static const unsigned EhDataReg[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 40c5681..ffa2c76 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -47,10 +47,10 @@ public:
ABI GetEnumValue() const { return ThisABI; }
/// The registers to use for byval arguments.
- const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+ ArrayRef<MCPhysReg> GetByValArgRegs() const;
/// The registers to use for the variable argument list.
- const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+ ArrayRef<MCPhysReg> GetVarArgRegs() const;
/// Obtain the size of the area allocated by the callee for arguments.
/// CallingConv::FastCall affects the value for O32.
@@ -67,9 +67,12 @@ public:
unsigned GetFramePtr() const;
unsigned GetBasePtr() const;
unsigned GetNullPtr() const;
+ unsigned GetZeroReg() const;
unsigned GetPtrAdduOp() const;
unsigned GetPtrAddiuOp() const;
+ unsigned GetGPRMoveOp() const;
inline bool ArePtrs64bit() const { return IsN64(); }
+ inline bool AreGprs64bit() const { return IsN32() || IsN64(); }
unsigned GetEhDataReg(unsigned I) const;
};
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 328e717..e4865e2 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -63,15 +63,19 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// address range. Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isIntN(16, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC19_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 19-bit signed immediate.
- if (!isIntN(19, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC19 fixup");
+ if (!isInt<19>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup");
+ return 0;
+ }
break;
case Mips::fixup_Mips_26:
// So far we are only using this type for jumps.
@@ -104,45 +108,57 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 7-bit signed immediate.
- if (!isIntN(7, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC7 fixup");
+ if (!isInt<7>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MICROMIPS_PC10_S1:
Value -= 2;
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 10-bit signed immediate.
- if (!isIntN(10, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC10 fixup");
+ if (!isInt<10>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MICROMIPS_PC16_S1:
Value -= 4;
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isIntN(16, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC18_S3:
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isIntN(18, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC21_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isIntN(21, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ return 0;
+ }
break;
case Mips::fixup_MIPS_PC26_S2:
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isIntN(26, Value) && Ctx)
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value) && Ctx) {
+ Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup");
+ return 0;
+ }
break;
}
@@ -232,6 +248,18 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
}
+bool MipsAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const {
+ if (Name == "R_MIPS_NONE") {
+ MappedKind = (MCFixupKind)Mips::fixup_Mips_NONE;
+ return true;
+ }
+ if (Name == "R_MIPS_32") {
+ MappedKind = FK_Data_4;
+ return true;
+ }
+ return MCAsmBackend::getFixupKind(Name, MappedKind);
+}
+
const MCFixupKindInfo &MipsAsmBackend::
getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = {
@@ -239,6 +267,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
+ { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 0, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
@@ -304,6 +333,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
// MipsFixupKinds.h.
//
// name offset bits flags
+ { "fixup_Mips_NONE", 0, 0, 0 },
{ "fixup_Mips_16", 16, 16, 0 },
{ "fixup_Mips_32", 0, 32, 0 },
{ "fixup_Mips_REL32", 0, 32, 0 },
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index b3d5a49..1c9af92 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -41,6 +41,7 @@ public:
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
+ bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
unsigned getNumFixupKinds() const override {
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 9b29527..5b9f02b 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -68,6 +68,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
unsigned Kind = (unsigned)Fixup.getKind();
switch (Kind) {
+ case Mips::fixup_Mips_NONE:
+ return ELF::R_MIPS_NONE;
case Mips::fixup_Mips_16:
case FK_Data_2:
return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -325,13 +327,24 @@ static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
// matching LO;
// - prefer LOs without a pair;
// - prefer LOs with higher offset;
+
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+ const ELFRelocationEntry &A = *AP;
+ const ELFRelocationEntry &B = *BP;
+ if (A.Offset != B.Offset)
+ return B.Offset - A.Offset;
+ if (B.Type != A.Type)
+ return A.Type - B.Type;
+ return 0;
+}
+
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
if (Relocs.size() < 2)
return;
- // The default function sorts entries by Offset in descending order.
- MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+ // Sorts entries by Offset in descending order.
+ array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
// Init MipsRelocs from Relocs.
std::vector<MipsRelocationEntry> MipsRelocs;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index b45d9cf..e7d687e 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -63,7 +63,7 @@ void MipsELFStreamer::SwitchSection(MCSection *Section,
}
void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) {
+ SMLoc Loc) {
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
Labels.clear();
}
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index af9311f..a241cde 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -60,8 +60,7 @@ public:
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .word directive is emitted.
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override;
+ void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index e601963..3652f4b 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -23,8 +23,11 @@ namespace Mips {
// in MipsAsmBackend.cpp.
//
enum Fixups {
+ // Branch fixups resulting in R_MIPS_NONE.
+ fixup_Mips_NONE = FirstTargetFixupKind,
+
// Branch fixups resulting in R_MIPS_16.
- fixup_Mips_16 = FirstTargetFixupKind,
+ fixup_Mips_16,
// Pure 32 bit data fixup resulting in - R_MIPS_32.
fixup_Mips_32,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 5d23fcb..d4ccf03 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -17,13 +17,14 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
+class Triple;
- class MipsMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit MipsMCAsmInfo(const Triple &TheTriple);
- };
+class MipsMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit MipsMCAsmInfo(const Triple &TheTriple);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e36263d..4b030eb 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -190,6 +190,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
else
NewOpcode = Mips::Std2MicroMips(Opcode, Mips::Arch_micromips);
+ // Check whether it is Dsp instruction.
+ if (NewOpcode == -1)
+ NewOpcode = Mips::Dsp2MicroMips(Opcode, Mips::Arch_mmdsp);
+
if (NewOpcode != -1) {
if (Fixups.size() > N)
Fixups.pop_back();
@@ -346,6 +350,23 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTarget26OpValueMM - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM(
+ const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 2.
+ if (MO.isImm())
+ return MO.getImm() >> 1;
+
+ // TODO: Push 26 PC fixup.
+ return 0;
+}
+
/// getJumpOffset16OpValue - Return binary encoding of the jump
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -745,7 +766,8 @@ getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
const MCSubtargetInfo &STI) const {
// Register is encoded in bits 9-5, offset is encoded in bits 4-0.
assert(MI.getOperand(OpNo).isReg() &&
- MI.getOperand(OpNo).getReg() == Mips::SP &&
+ (MI.getOperand(OpNo).getReg() == Mips::SP ||
+ MI.getOperand(OpNo).getReg() == Mips::SP_64) &&
"Unexpected base register!");
unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
Fixups, STI) >> 2;
@@ -769,6 +791,19 @@ getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 8-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+ STI) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
+
+ return (OffBits & 0x1FF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -792,6 +827,19 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
}
unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+ assert(MI.getOperand(OpNo).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+ STI) << 16;
+ unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
+
+ return (OffBits & 0xFFFF) | RegBits;
+}
+
+unsigned MipsMCCodeEmitter::
getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -801,7 +849,9 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
default:
break;
case Mips::SWM16_MM:
+ case Mips::SWM16_MMR6:
case Mips::LWM16_MM:
+ case Mips::LWM16_MMR6:
OpNo = MI.getNumOperands() - 2;
break;
}
@@ -815,15 +865,6 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
return ((OffBits >> 2) & 0x0F);
}
-unsigned
-MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- assert(MI.getOperand(OpNo).isImm());
- unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
- return SizeEncoding - 1;
-}
-
// FIXME: should be called getMSBEncoding
//
unsigned
@@ -838,13 +879,15 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
return Position + Size - 1;
}
+template <unsigned Bits, int Offset>
unsigned
-MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+MipsMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
assert(MI.getOperand(OpNo).isImm());
- // The immediate is encoded as 'immediate - 1'.
- return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1;
+ unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+ Value -= Offset;
+ return Value;
}
unsigned
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 911cc2f..fdacd17 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -137,6 +137,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getBranchTarget26OpValueMM - Return binary encoding of the branch
+ // offset operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTarget26OpValueMM(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getJumpOffset16OpValue - Return binary encoding of the jump
// offset operand. If the machine operand requires relocation,
// record the relocation and return zero.
@@ -172,23 +179,27 @@ public:
unsigned getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- // getLSAImmEncoding - Return binary encoding of LSA immediate.
- unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// Subtract Offset then encode as a N-bit unsigned integer.
+ template <unsigned Bits, int Offset>
+ unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index fd2ed17..e889972 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -51,8 +51,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS MipsMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index e4da2df..e5fa755 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -89,9 +89,15 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() {
void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ forbidModuleDirective();
+}
void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) {
}
+void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {}
void MipsTargetStreamer::emitDirectiveModuleFP() {}
@@ -358,6 +364,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
forbidModuleDirective();
}
+void MipsTargetAsmStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+ OS << "\t.cprestore\t" << Offset << "\n";
+}
+
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
int RegOrOffset,
const MCSymbol &Sym,
@@ -373,7 +385,13 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
- OS << Sym.getName() << "\n";
+ OS << Sym.getName();
+ forbidModuleDirective();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {
+ OS << "\t.cpreturn";
forbidModuleDirective();
}
@@ -595,8 +613,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHT_REL);
+ MCSymbol *Sym = Context.getOrCreateSymbol(Name);
const MCSymbolRefExpr *ExprRef =
- MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context);
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context);
MCA.registerSection(*Sec);
Sec->setAlignment(4);
@@ -622,10 +641,25 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
OS.PopSection();
+
+ // .end also implicitly sets the size.
+ MCSymbol *CurPCSym = Context.createTempSymbol();
+ OS.EmitLabel(CurPCSym);
+ const MCExpr *Size = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
+ ExprRef, Context);
+ int64_t AbsSize;
+ if (!Size->evaluateAsAbsolute(AbsSize, MCA))
+ llvm_unreachable("Function size must be evaluatable as absolute");
+ Size = MCConstantExpr::create(AbsSize, Context);
+ static_cast<MCSymbolELF *>(Sym)->setSize(Size);
}
void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+ // .ent also acts like an implicit '.type symbol, STT_FUNC'
+ static_cast<const MCSymbolELF &>(Symbol).setType(ELF::STT_FUNC);
}
void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
@@ -752,6 +786,24 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
forbidModuleDirective();
}
+void MipsTargetELFStreamer::emitDirectiveCpRestore(
+ SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+ MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+ // .cprestore offset
+ // When PIC mode is enabled and the O32 ABI is used, this directive expands
+ // to:
+ // sw $gp, offset($sp)
+ // and adds a corresponding LW after every JAL.
+
+ // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+ // is used in non-PIC mode.
+ if (!Pic || (getABI().IsN32() || getABI().IsN64()))
+ return;
+
+ for (const MCInst &Inst : StoreInsts)
+ getStreamer().EmitInstruction(Inst, STI);
+}
+
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
int RegOrOffset,
const MCSymbol &Sym,
@@ -766,7 +818,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
// Either store the old $gp in a register or on the stack
if (IsReg) {
// move $save, $gpreg
- Inst.setOpcode(Mips::DADDu);
+ Inst.setOpcode(Mips::OR64);
Inst.addOperand(MCOperand::createReg(RegOrOffset));
Inst.addOperand(MCOperand::createReg(Mips::GP));
Inst.addOperand(MCOperand::createReg(Mips::ZERO));
@@ -810,6 +862,30 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
forbidModuleDirective();
}
+void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) {
+ // Only N32 and N64 emit anything for .cpreturn iff PIC is set.
+ if (!Pic || !(getABI().IsN32() || getABI().IsN64()))
+ return;
+
+ MCInst Inst;
+ // Either restore the old $gp from a register or on the stack
+ if (SaveLocationIsRegister) {
+ Inst.setOpcode(Mips::OR);
+ Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(SaveLocation));
+ Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+ } else {
+ Inst.setOpcode(Mips::LD);
+ Inst.addOperand(MCOperand::createReg(Mips::GP));
+ Inst.addOperand(MCOperand::createReg(Mips::SP));
+ Inst.addOperand(MCOperand::createImm(SaveLocation));
+ }
+ getStreamer().EmitInstruction(Inst, STI);
+
+ forbidModuleDirective();
+}
+
void MipsTargetELFStreamer::emitMipsAbiFlags() {
MCAssembler &MCA = getStreamer().getAssembler();
MCContext &Context = MCA.getContext();
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 187a022..400f6eef 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -16,6 +16,64 @@ class MMR6Arch<string opstr> {
string BaseOpcode = opstr;
}
+// Class used for microMIPS32r6 and microMIPS64r6 instructions.
+class MicroMipsR6Inst16 : PredicateControl {
+ string DecoderNamespace = "MicroMipsR6";
+ let InsnPredicates = [HasMicroMips32r6];
+}
+
+class BC16_FM_MM16R6 {
+ bits<10> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x33;
+ let Inst{9-0} = offset;
+}
+
+class BEQZC_BNEZC_FM_MM16R6<bits<6> op> : MicroMipsR6Inst16 {
+ bits<3> rs;
+ bits<7> offset;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = op;
+ let Inst{9-7} = rs;
+ let Inst{6-0} = offset;
+}
+
+class POOL16C_JALRC_FM_MM16R6<bits<5> op> {
+ bits<5> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = rs;
+ let Inst{4-0} = op;
+}
+
+class POOL16C_JRCADDIUSP_FM_MM16R6<bits<5> op> {
+ bits<5> imm;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-5} = imm;
+ let Inst{4-0} = op;
+}
+
+class POOL16C_LWM_SWM_FM_MM16R6<bits<4> funct> {
+ bits<2> rt;
+ bits<4> addr;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-8} = rt;
+ let Inst{7-4} = addr;
+ let Inst{3-0} = funct;
+}
+
class POOL32A_BITSWAP_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rd;
bits<5> rt;
@@ -71,6 +129,64 @@ class ADDI_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
let Inst{15-0} = imm16;
}
+class POOL32C_ST_EVA_FM_MMR6<bits<6> op, bits<3> funct> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = hint;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b1010;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LB32_FM_MMR6 : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000111;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class LBU32_FM_MMR6 : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000101;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class POOL32C_LB_LBU_FM_MMR6<bits<3> funct> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-12} = 0b0110;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = addr{8-0};
+}
+
class SIGN_EXTEND_FM_MMR6<string instr_asm, bits<10> funct>
: MMR6Arch<instr_asm> {
bits<5> rd;
@@ -124,6 +240,69 @@ class POOL32A_FM_MMR6<bits<10> funct> : MipsR6Inst {
let Inst{9-0} = funct;
}
+class POOL32A_PAUSE_FM_MMR6<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = op;
+ let Inst{10-6} = 0;
+ let Inst{5-0} = 0;
+}
+
+class POOL32A_RDPGPR_FM_MMR6<bits<10> funct> {
+ bits<5> rt;
+ bits<5> rd;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rd;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_RDHWR_FM_MMR6 {
+ bits<5> rt;
+ bits<5> rs;
+ bits<3> sel;
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = 0;
+ let Inst{13-11} = sel;
+ let Inst{10} = 0;
+ let Inst{9-0} = 0b0111000000;
+}
+
+class POOL32A_SYNC_FM_MMR6 {
+ bits<5> stype;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = stype;
+ let Inst{15-6} = 0b0110101101;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32I_SYNCI_FM_MMR6 {
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> immediate = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010000;
+ let Inst{25-21} = 0b01100;
+ let Inst{20-16} = base;
+ let Inst{15-0} = immediate;
+}
+
class POOL32A_2R_FM_MMR6<bits<10> funct> : MipsR6Inst {
bits<5> rs;
bits<5> rt;
@@ -198,6 +377,78 @@ class POOL32A_LSA_FM<bits<6> funct> : MipsR6Inst {
let Inst{5-0} = funct;
}
+class SB32_SH32_STORE_FM_MMR6<bits<6> op> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class POOL32C_STORE_EVA_FM_MMR6<bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b1010;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LOAD_WORD_EVA_FM_MMR6<bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0b0110;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class LOAD_WORD_FM_MMR6 {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b111111;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-0} = offset;
+}
+
+class LOAD_UPPER_IMM_FM_MMR6 {
+ bits<5> rt;
+ bits<16> imm16;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b000100;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = 0;
+ let Inst{15-0} = imm16;
+}
+
class CMP_BRANCH_1R_RT_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rt;
bits<16> offset;
@@ -222,12 +473,13 @@ class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
let Inst{15-0} = offset;
}
-class ERET_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
+class POOL32A_ERET_FM_MMR6<string instr_asm, bits<10> funct>
+ : MMR6Arch<instr_asm> {
bits<32> Inst;
let Inst{31-26} = 0x00;
let Inst{25-16} = 0x00;
- let Inst{15-6} = 0x3cd;
+ let Inst{15-6} = funct;
let Inst{5-0} = 0x3c;
}
@@ -262,7 +514,8 @@ class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
let Inst{5-0} = 0x0;
}
-class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+class POOL32A_EIDI_MMR6_ENC<string instr_asm, bits<10> funct>
+ : MMR6Arch<instr_asm> {
bits<32> Inst;
bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
@@ -287,3 +540,323 @@ class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<in
let Inst{10} = rotate;
let Inst{9-0} = funct;
}
+
+class SW32_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
+ bits<5> rt;
+ bits<21> addr;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = addr{20-16};
+ let Inst{15-0} = addr{15-0};
+}
+
+class POOL32C_SWE_FM_MMR6<string instr_asm, bits<6> op, bits<4> fmt,
+ bits<3> funct> : MMR6Arch<instr_asm> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = fmt;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class POOL32F_ARITH_FM_MMR6<string instr_asm, bits<2> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10} = 0;
+ let Inst{9-8} = fmt;
+ let Inst{7-0} = funct;
+}
+
+class POOL32F_ARITHF_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_MOV_NEG_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_MINMAX_FM<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_CMP_FM<string instr_asm, bits<6> format, FIELD_CMP_COND Cond>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-6} = Cond.Value;
+ let Inst{5-0} = format;
+}
+
+class POOL32F_CVT_LW_FM<string instr_asm, bit fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_CVT_DS_FM<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_ABS_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14-13} = fmt;
+ let Inst{12-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_MATH_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL16A_ADDU16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rs;
+ bits<3> rt;
+ bits<3> rd;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b000001;
+ let Inst{9-7} = rs;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rd;
+ let Inst{0} = 0;
+}
+
+class POOL16C_AND16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = 0b0001;
+}
+
+class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0x11;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = 0b0000;
+}
+
+class POOL16C_OR16_XOR16_FM_MMR6<bits<4> op> {
+ bits<3> rt;
+ bits<3> rs;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-7} = rt;
+ let Inst{6-4} = rs;
+ let Inst{3-0} = op;
+}
+
+class POOL16C_BREAKPOINT_FM_MMR6<bits<6> op> {
+ bits<4> code_;
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b010001;
+ let Inst{9-6} = code_;
+ let Inst{5-0} = op;
+}
+
+class POOL16A_SUBU16_FM_MMR6 {
+ bits<3> rs;
+ bits<3> rt;
+ bits<3> rd;
+
+ bits<16> Inst;
+
+ let Inst{15-10} = 0b000001;
+ let Inst{9-7} = rs;
+ let Inst{6-4} = rt;
+ let Inst{3-1} = rd;
+ let Inst{0} = 0b1;
+}
+
+class POOL32A_WRPGPR_WSBH_FM_MMR6<bits<10> funct> : MipsR6Inst {
+ bits<5> rt;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x00;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = funct;
+ let Inst{5-0} = 0x3c;
+}
+
+class POOL32F_RECIP_ROUND_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15} = 0;
+ let Inst{14} = fmt;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_RINT_FM_MMR6<string instr_asm, bits<2> fmt>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = fs;
+ let Inst{20-16} = fd;
+ let Inst{15-11} = 0;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = 0b000100000;
+}
+
+class POOL32F_SEL_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = ft;
+ let Inst{20-16} = fs;
+ let Inst{15-11} = fd;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
+
+class POOL32F_CLASS_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010101;
+ let Inst{25-21} = fs;
+ let Inst{20-16} = fd;
+ let Inst{15-11} = 0b00000;
+ let Inst{10-9} = fmt;
+ let Inst{8-0} = funct;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 53bde13..31b5db0 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -11,6 +11,13 @@
//
//===----------------------------------------------------------------------===//
+def brtarget26_mm : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget26OpValueMM";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget26MM";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Encodings
@@ -28,6 +35,9 @@ class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>;
class AUI_MMR6_ENC : AUI_FM_MMR6;
class BALC_MMR6_ENC : BRANCH_OFF26_FM<0b101101>;
class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
+class BC16_MMR6_ENC : BC16_FM_MM16R6;
+class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>;
+class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>;
class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
@@ -42,13 +52,19 @@ class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>;
-class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>;
-class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
+class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>;
+class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>;
+class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>;
+class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>;
class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
+class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>;
class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
class JIC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b101000>;
+class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>;
+class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>;
class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>;
class LWPC_MMR6_ENC : PCREL19_FM_MMR6<0b01>;
+class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>;
class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>;
class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>;
class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>;
@@ -59,15 +75,99 @@ class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>;
class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>;
class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>;
class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>;
+class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>;
class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>;
class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>;
class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
+class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>;
+class SWE_MMR6_ENC : POOL32C_SWE_FM_MMR6<"swe", 0x18, 0xa, 0x7>;
+class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>;
+class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>;
+class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>;
+class PREFE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b010>;
+class CACHEE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b011>;
+class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x3c5>;
+class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x1ec>;
+class LB_MMR6_ENC : LB32_FM_MMR6;
+class LBU_MMR6_ENC : LBU32_FM_MMR6;
+class LBE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b100>;
+class LBUE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b000>;
+class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>;
+class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6;
+class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">;
+class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">;
+class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
+class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
+class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
+class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
+class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
+class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>;
+class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>;
+class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>;
+class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>;
+class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>;
+class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>;
+class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>;
+class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>;
+class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>;
+class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>;
+class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>;
+class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>;
+class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>;
+class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>;
+class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>;
+class RSQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.s", 0, 0b00001000>;
+class RSQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.d", 1, 0b00001000>;
+class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>;
+class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>;
+class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>;
+class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>;
+class SHE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b101>;
+class LLE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b110>;
+class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>;
+class LW_MMR6_ENC : LOAD_WORD_FM_MMR6;
+class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6;
+class RECIP_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.s", 0, 0b01001000>;
+class RECIP_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.d", 1, 0b01001000>;
+class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>;
+class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>;
+class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0,
+ 0b11001100>;
+class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1,
+ 0b11001100>;
+class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0,
+ 0b11101100>;
+class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1,
+ 0b11101100>;
+class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>;
+class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>;
+class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>;
+class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>;
+class SELENZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.s", 0, 0b001111000>;
+class SELENZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.d", 1, 0b001111000>;
+class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>;
+class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>;
+
+class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6;
+class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6;
+class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>, MicroMipsR6Inst16;
+class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6;
+class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>;
+class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>, MicroMipsR6Inst16;
+class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16;
+class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>;
+class LI16_MMR6_ENC : LI_FM_MM16;
+class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>;
+class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>;
+class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6;
+class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>;
class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
RegisterOperand GPROpnd>
@@ -108,6 +208,43 @@ class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm,
list<Register> Defs = [RA];
}
+/// Floating Point Instructions
+class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>;
+class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>;
+class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>;
+class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>;
+class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>;
+class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>;
+class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>;
+class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>;
+class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>;
+class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
+class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
+class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
+class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
+class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
+class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>;
+class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
+class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
+class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>;
+class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>;
+class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>;
+class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>;
+class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>;
+class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>;
+
+class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>;
+class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>;
+class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>;
+class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>;
+class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>;
+class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>;
+class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>;
+class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>;
+class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>;
+class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>;
+
//===----------------------------------------------------------------------===//
//
// Instruction Descriptions
@@ -130,11 +267,34 @@ class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd>
bit isBarrier = 1;
}
-class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26> {
+class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm> {
bit isCall = 1;
list<Register> Defs = [RA];
}
-class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26>;
+class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm>;
+
+class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
+ !strconcat("bc16", "\t$offset"), [],
+ II_BC, FrmI>,
+ MMR6Arch<"bc16">, MicroMipsR6Inst16 {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 0;
+ let AdditionalPredicates = [RelocPIC];
+ let Defs = [AT];
+}
+
+class BEQZC_BNEZC_MM16R6_DESC_BASE<string instr_asm>
+ : CBranchZeroMM<instr_asm, brtarget7_mm, GPRMM16Opnd>, MMR6Arch<instr_asm> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let hasDelaySlot = 0;
+ let Defs = [AT];
+}
+class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">;
+class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">;
+
class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>;
class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>;
@@ -162,6 +322,35 @@ class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd>;
class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>;
+class PREFE_CACHEE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> :
+ CACHE_HINT_MMR6_DESC<instr_asm, MemOpnd,
+ GPROpnd> {
+ string DecoderMethod = "DecodePrefeOpMM";
+}
+
+class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, GPR32Opnd>;
+class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, GPR32Opnd>;
+
+class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins MemOpnd:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeLoadByte15";
+ bit mayLoad = 1;
+}
+class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd>;
+class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd>;
+
+class LBE_LBUE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd>
+ : LB_LBU_MMR6_DESC_BASE<instr_asm, MemOpnd, GPROpnd> {
+ let DecoderMethod = "DecodeLoadByte9";
+}
+class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd>;
+class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd>;
+
class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
: MMR6Arch<instr_asm> {
dag OutOperandList = (outs GPROpnd:$rt);
@@ -174,10 +363,22 @@ class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
class EHB_MMR6_DESC : Barrier<"ehb">;
class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
+class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd>;
class ERET_MMR6_DESC : ER_FT<"eret">;
+class DERET_MMR6_DESC : ER_FT<"deret">;
class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
+class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+ : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [(MipsJmpLink RO:$rs)], II_JALR, FrmR>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let isCall = 1;
+ let hasDelaySlot = 0;
+ let Defs = [RA];
+}
+class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
+
class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
RegisterOperand GPROpnd>
: MMR6Arch<opstr> {
@@ -200,6 +401,27 @@ class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16,
list<Register> Defs = [AT];
}
+class JRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+ : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+ [], II_JR, FrmR>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let hasDelaySlot = 0;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>;
+
+class JRCADDIUSP_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm",
+ [], II_JRADDIUSP, FrmR>,
+ MMR6Arch<"jrcaddiusp">, MicroMipsR6Inst16 {
+ let hasDelaySlot = 0;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
class ALIGN_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> : MMR6Arch<instr_asm> {
dag OutOperandList = (outs GPROpnd:$rd);
@@ -241,7 +463,7 @@ class LSA_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
list<dag> Pattern = [];
}
-class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
class PCREL_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> : MMR6Arch<instr_asm> {
@@ -264,6 +486,18 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class PAUSE_MMR6_DESC : Barrier<"pause">;
+class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel);
+ string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel");
+ list<dag> Pattern = [];
+ InstrItinClass Itinerary = II_RDHWR;
+ Format Form = FrmR;
+}
+
+class WAIT_MMR6_DESC : WaitMM<"wait">;
+class SSNOP_MMR6_DESC : Barrier<"ssnop">;
class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
@@ -277,13 +511,426 @@ class ORI_MMR6_DESC : ArithLogicI<"ori", simm16, GPR32Opnd>;
class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd>;
class XORI_MMR6_DESC : ArithLogicI<"xori", simm16, GPR32Opnd>;
+class SWE_MMR6_DESC_BASE<string opstr, DAGOperand RO, DAGOperand MO,
+ SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary,
+ ComplexPattern Addr = addr> :
+ InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
+ let DecoderMethod = "DecodeMem";
+ let mayStore = 1;
+}
+class SW_MMR6_DESC : Store<"sw", GPR32Opnd>;
+class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9>;
+
+class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+ : MMR6Arch<instr_asm> {
+ dag InOperandList = (ins RO:$rs);
+ dag OutOperandList = (outs RO:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+ list<dag> Pattern = [];
+ Format f = FrmR;
+ string BaseOpcode = instr_asm;
+ bit hasSideEffects = 0;
+}
+class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd>;
+class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd>;
+
+/// Floating Point Instructions
+class FARITH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RC,
+ InstrItinClass Itin, bit isComm,
+ SDPatternOperator OpNode = null_frag> : HARDFLOAT {
+ dag OutOperandList = (outs RC:$fd);
+ dag InOperandList = (ins RC:$ft, RC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))];
+ InstrItinClass Itinerary = Itin;
+ bit isCommutable = isComm;
+}
+class FADD_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>;
+class FADD_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"add.d", AFGR64Opnd, II_ADD_D, 1, fadd>;
+class FSUB_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>;
+class FSUB_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"sub.d", AFGR64Opnd, II_SUB_D, 0, fsub>;
+class FMUL_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>;
+class FMUL_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"mul.d", AFGR64Opnd, II_MUL_D, 1, fmul>;
+class FDIV_S_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>;
+class FDIV_D_MMR6_DESC
+ : FARITH_MMR6_DESC_BASE<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>;
+class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>, HARDFLOAT;
+class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>, HARDFLOAT;
+class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>, HARDFLOAT;
+class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>, HARDFLOAT;
+
+class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+ RegisterOperand SrcRC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+}
+class FMOV_S_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>;
+class FMOV_D_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>;
+class FNEG_S_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>;
+class FNEG_D_MMR6_DESC
+ : FMOV_FNEG_MMR6_DESC_BASE<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>;
+
+class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>, HARDFLOAT;
+class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>, HARDFLOAT;
+class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>, HARDFLOAT;
+class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>, HARDFLOAT;
+
+class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>, HARDFLOAT;
+class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>, HARDFLOAT;
+class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>, HARDFLOAT;
+class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>, HARDFLOAT;
+
+class CVT_MMR6_DESC_BASE<
+ string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin, SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+}
+
+class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd,
+ II_CVT>;
+class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_W_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.d", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.s", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.w", FGR32Opnd, AFGR64Opnd,
+ II_CVT>;
+class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd,
+ II_CVT>, FGR_64;
+class CVT_S_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.d", AFGR64Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd,
+ II_CVT>;
+class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd,
+ II_CVT>, FGR_64;
+
+multiclass CMP_CC_MMR6<bits<6> format, string Typestr,
+ RegisterOperand FGROpnd> {
+ def CMP_AF_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_UN_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_EQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_UEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_LT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_ULT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_LE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_ULE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SAF_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SUN_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>,
+ CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SUEQ_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SLT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SULT_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SLE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+ def CMP_SULE_#NAME : POOL32F_CMP_FM<
+ !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+ ISA_MICROMIPS32R6;
+}
+
+class ABSS_FT_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+ RegisterOperand SrcRC, InstrItinClass Itin,
+ SDPatternOperator OpNode = null_frag>
+ : HARDFLOAT, NeverHasSideEffects {
+ dag OutOperandList = (outs DstRC:$ft);
+ dag InOperandList = (ins SrcRC:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+ list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+ InstrItinClass Itinerary = Itin;
+ Format Form = FrmFR;
+ list<Predicate> EncodingPredicates = [HasStdEnc];
+}
+
+class ABS_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.s", FGR32Opnd, FGR32Opnd,
+ II_ABS, fabs>;
+class ABS_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.d", AFGR64Opnd, AFGR64Opnd,
+ II_ABS, fabs>;
+class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd,
+ FGR32Opnd, II_FLOOR>;
+class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd,
+ FGR64Opnd, II_FLOOR>;
+class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd,
+ FGR32Opnd, II_FLOOR>;
+class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd,
+ AFGR64Opnd, II_FLOOR>;
+class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd,
+ FGR32Opnd, II_CEIL>;
+class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd,
+ FGR64Opnd, II_CEIL>;
+class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd,
+ FGR32Opnd, II_CEIL>;
+class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd,
+ AFGR64Opnd, II_CEIL>;
+class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd,
+ FGR32Opnd, II_TRUNC>;
+class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
+ FGR64Opnd, II_TRUNC>;
+class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>;
+class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
+ AFGR64Opnd, II_TRUNC>;
+class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
+ II_SQRT_S, fsqrt>;
+class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
+ II_SQRT_D, fsqrt>;
+class RSQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>;
+class RSQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.d", FGR32Opnd,
+ AFGR64Opnd, II_TRUNC>;
+class RECIP_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.s", FGR32Opnd,
+ FGR32Opnd, II_ROUND>;
+class RECIP_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.d", FGR32Opnd, FGR32Opnd,
+ II_ROUND>;
+class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd,
+ FGR32Opnd, II_ROUND>;
+class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd,
+ FGR64Opnd, II_ROUND>;
+class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd,
+ FGR32Opnd, II_ROUND>;
+class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd,
+ FGR64Opnd, II_ROUND>;
+
+class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
+class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+ // We must insert a SUBREG_TO_REG around $fd_in
+ bit usesCustomInserter = 1;
+}
+
+class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>;
+class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>;
+class SELENZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>;
+class SELENZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>;
+class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
+class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
+class CLASS_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
+class CLASS_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+
+class STORE_MMR6_DESC_BASE<string opstr, DAGOperand RO>
+ : Store<opstr, RO>, MMR6Arch<opstr> {
+ let DecoderMethod = "DecodeMemMMImm16";
+}
+class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd>;
+
+class STORE_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins RO:$rt, mem_mm_9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeStoreEvaOpMM";
+ bit mayStore = 1;
+}
+class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd>;
+class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd>;
+class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd>;
+class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd>;
+class LOAD_WORD_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO> :
+ MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins mem_mm_12:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ string DecoderMethod = "DecodeMemMMImm9";
+ bit mayLoad = 1;
+}
+class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd>;
+class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd>;
+class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
+ MMR6Arch<"addu16">;
+class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
+ MMR6Arch<"and16">;
+class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>,
+ MMR6Arch<"andi16">;
+class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16">;
+class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
+ MMR6Arch<"or16">;
+class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
+ MMR6Arch<"sll16">;
+class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
+ MMR6Arch<"srl16">;
+class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16">, MMR6Arch<"srl16">,
+ MicroMipsR6Inst16;
+class LI16_MMR6_DESC : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>,
+ MMR6Arch<"srl16">, MicroMipsR6Inst16, IsAsCheapAsAMove;
+class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"srl16">,
+ MicroMipsR6Inst16;
+class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16">, MMR6Arch<"sdbbp16">,
+ MicroMipsR6Inst16;
+class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+
+class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins mem:$addr);
+ string AsmString = "lw\t$rt, $addr";
+ let DecoderMethod = "DecodeMemMMImm16";
+ let canFoldAsLoad = 1;
+ let mayLoad = 1;
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))];
+ InstrItinClass Itinerary = II_LW;
+}
+
+class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins uimm16:$imm16);
+ string AsmString = "lui\t$rt, $imm16";
+ list<dag> Pattern = [];
+ bit hasSideEffects = 0;
+ bit isReMaterializable = 1;
+ InstrItinClass Itinerary = II_LUI;
+ Format Form = FrmI;
+}
+
+class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins i32imm:$stype);
+ string AsmString = !strconcat("sync", "\t$stype");
+ list<dag> Pattern = [(MipsSync imm:$stype)];
+ InstrItinClass Itinerary = NoItinerary;
+ bit HasSideEffects = 1;
+}
+
+class SYNCI_MMR6_DESC : SYNCI_FT<"synci"> {
+ let DecoderMethod = "DecodeSynciR6";
+}
+
+class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins GPR32Opnd:$rd);
+ string AsmString = !strconcat("rdpgpr", "\t$rt, $rd");
+}
+
+class SDBBP_MMR6_DESC : MipsR6Inst {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm20:$code_);
+ string AsmString = !strconcat("sdbbp", "\t$code_");
+ list<dag> Pattern = [];
+}
+
+class LWM16_MMR6_DESC
+ : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
+ !strconcat("lwm16", "\t$rt, $addr"), [],
+ NoItinerary, FrmI>,
+ MMR6Arch<"lwm16">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+ let mayLoad = 1;
+ InstrItinClass Itin = NoItinerary;
+ ComplexPattern Addr = addr;
+}
+
+class SWM16_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
+ !strconcat("swm16", "\t$rt, $addr"), [],
+ NoItinerary, FrmI>,
+ MMR6Arch<"swm16">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+ let mayStore = 1;
+ InstrItinClass Itin = NoItinerary;
+ ComplexPattern Addr = addr;
+}
+
+class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd, DAGOperand RO,
+ SDPatternOperator OpNode, InstrItinClass Itin,
+ Operand MemOpnd>
+ : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>,
+ MMR6Arch<opstr>, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMImm4";
+ let mayStore = 1;
+}
+class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd,
+ truncstorei8, II_SB, mem_mm_4>;
+class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd,
+ truncstorei16, II_SH, mem_mm_4_lsl1>;
+class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
+ store, II_SW, mem_mm_4_lsl2>;
+
+class SWSP_MMR6_DESC
+ : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
+ !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>,
+ MMR6Arch<"sw">, MicroMipsR6Inst16 {
+ let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
+ let mayStore = 1;
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
//
//===----------------------------------------------------------------------===//
-let DecoderNamespace = "MicroMips32r6" in {
+let DecoderNamespace = "MicroMipsR6" in {
def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6;
def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6;
def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -298,6 +945,11 @@ def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6;
def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6;
def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6;
def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC,
ISA_MICROMIPS32R6;
def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC,
@@ -320,13 +972,21 @@ def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6;
def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6;
-def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6;
def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
ISA_MICROMIPS32R6;
+def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6;
def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6;
+def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC,
+ ISA_MICROMIPS32R6;
def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6;
def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6;
+def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6;
def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6;
def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6;
def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -337,17 +997,211 @@ def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6;
def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6;
def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6;
def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6;
+def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6;
def SEB_MMR6 : StdMMR6Rel, SEB_MMR6_DESC, SEB_MMR6_ENC, ISA_MICROMIPS32R6;
def SEH_MMR6 : StdMMR6Rel, SEH_MMR6_DESC, SEH_MMR6_ENC, ISA_MICROMIPS32R6;
def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
ISA_MICROMIPS32R6;
def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
ISA_MICROMIPS32R6;
+def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6;
def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6;
def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
+def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6;
+def PREFE_MMR6 : StdMMR6Rel, PREFE_MMR6_ENC, PREFE_MMR6_DESC, ISA_MICROMIPS32R6;
+def CACHEE_MMR6 : StdMMR6Rel, CACHEE_MMR6_ENC, CACHEE_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6;
+def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBE_MMR6 : R6MMR6Rel, LBE_MMR6_ENC, LBE_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBUE_MMR6 : R6MMR6Rel, LBUE_MMR6_ENC, LBUE_MMR6_DESC, ISA_MICROMIPS32R6;
+def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6;
+def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6;
+def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
+let DecoderMethod = "DecodeMemMMImm16" in {
+ def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+let DecoderMethod = "DecodeMemMMImm9" in {
+ def SWE_MMR6 : StdMMR6Rel, SWE_MMR6_DESC, SWE_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+/// Floating Point Instructions
+def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FADD_D_MMR6 : StdMMR6Rel, FADD_D_MMR6_ENC, FADD_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FSUB_D_MMR6 : StdMMR6Rel, FSUB_D_MMR6_ENC, FSUB_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMUL_D_MMR6 : StdMMR6Rel, FMUL_D_MMR6_ENC, FMUL_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FDIV_D_MMR6 : StdMMR6Rel, FDIV_D_MMR6_ENC, FDIV_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FNEG_D_MMR6 : StdMMR6Rel, FNEG_D_MMR6_ENC, FNEG_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_W_D_MMR6 : StdMMR6Rel, CVT_W_D_MMR6_ENC, CVT_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_S_MMR6 : StdMMR6Rel, CVT_D_S_MMR6_ENC, CVT_D_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_W_MMR6 : StdMMR6Rel, CVT_D_W_MMR6_ENC, CVT_D_W_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_D_MMR6 : StdMMR6Rel, CVT_S_D_MMR6_ENC, CVT_S_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd>;
+defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd>;
+def ABS_S_MMR6 : StdMMR6Rel, ABS_S_MMR6_ENC, ABS_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def ABS_D_MMR6 : StdMMR6Rel, ABS_D_MMR6_ENC, ABS_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RSQRT_S_MMR6 : StdMMR6Rel, RSQRT_S_MMR6_ENC, RSQRT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RSQRT_D_MMR6 : StdMMR6Rel, RSQRT_D_MMR6_ENC, RSQRT_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6;
+def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6;
+def SHE_MMR6 : StdMMR6Rel, SHE_MMR6_DESC, SHE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LLE_MMR6 : StdMMR6Rel, LLE_MMR6_DESC, LLE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LWE_MMR6 : StdMMR6Rel, LWE_MMR6_DESC, LWE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6;
+def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC,
+ ISA_MICROMIPS32R6;
+def RECIP_S_MMR6 : StdMMR6Rel, RECIP_S_MMR6_ENC, RECIP_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RECIP_D_MMR6 : StdMMR6Rel, RECIP_D_MMR6_ENC, RECIP_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SEL_S_MMR6 : StdMMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def SEL_D_MMR6 : StdMMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def SELEQZ_S_MMR6 : StdMMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELEQZ_D_MMR6 : StdMMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELENZ_S_MMR6 : StdMMR6Rel, SELENZ_S_MMR6_ENC, SELENZ_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def SELENZ_D_MMR6 : StdMMR6Rel, SELENZ_D_MMR6_ENC, SELENZ_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC,
+ ISA_MICROMIPS32R6;
+def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC,
+ ISA_MICROMIPS32R6;
}
//===----------------------------------------------------------------------===//
@@ -357,4 +1211,23 @@ def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6;
+def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
+ !strconcat("b", "\t$offset")> {
+ string DecoderNamespace = "MicroMipsR6";
+}
+def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"rdhwr $rt, $rs",
+ (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
+ ISA_MICROMIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// MicroMips arbitrary patterns that map to one or more instructions
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr),
+ (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
new file mode 100644
index 0000000..da305a2
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
@@ -0,0 +1,86 @@
+//=- MicroMips64r6InstrFormats.td - Instruction Formats -*- tablegen -* -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes microMIPS64r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_FM_MMR6 {
+ bits<5> rt;
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b111100;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm;
+}
+
+class POOL32I_ADD_IMM_FM_MMR6<bits<5> funct> {
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010000;
+ let Inst{25-21} = funct;
+ let Inst{20-16} = rs;
+ let Inst{15-0} = imm;
+}
+
+class POOL32S_EXTBITS_FM_MMR6<bits<6> funct> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> size;
+ bits<5> pos;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = size;
+ let Inst{10-6} = pos;
+ let Inst{5-0} = funct;
+}
+
+class POOL32S_DALIGN_FM_MMR6 {
+ bits<5> rs;
+ bits<5> rt;
+ bits<5> rd;
+ bits<3> bp;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = bp;
+ let Inst{7-6} = 0b00;
+ let Inst{5-0} = 0b011100;
+}
+
+class POOL32A_DIVMOD_FM_MMR6<string instr_asm, bits<9> funct>
+ : MMR6Arch<instr_asm> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b010110;
+ let Inst{25-21} = rd;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rt;
+ let Inst{10-9} = 0b00;
+ let Inst{8-0} = funct;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
new file mode 100644
index 0000000..ec1aef8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -0,0 +1,119 @@
+//=- MicroMips64r6InstrInfo.td - Instruction Information -*- tablegen -*- -=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips64r6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_ENC : DAUI_FM_MMR6;
+class DAHI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10001>;
+class DATI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10000>;
+class DEXT_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b101100>;
+class DEXTM_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b100100>;
+class DEXTU_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b010100>;
+class DALIGN_MMR6_ENC : POOL32S_DALIGN_FM_MMR6;
+class DDIV_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddiv", 0b100011000>;
+class DMOD_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmod", 0b101011000>;
+class DDIVU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddivu", 0b110011000>;
+class DMODU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmodu", 0b111011000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
+ list<dag> Pattern = [];
+}
+class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd>;
+
+class DAHI_DATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+ string Constraints = "$rs = $rt";
+}
+class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd>;
+class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd>;
+
+class EXTBITS_DESC_BASE<string instr_asm, RegisterOperand RO, Operand PosOpnd,
+ Operand SizeOpnd, SDPatternOperator Op = null_frag>
+ : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $pos, $size");
+ list<dag> Pattern = [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))];
+ InstrItinClass Itinerary = II_EXT;
+ Format Form = FrmR;
+ string BaseOpcode = instr_asm;
+}
+// TODO: Add 'pos + size' constraint check to dext* instructions
+// DEXT: 0 < pos + size <= 63
+// DEXTM, DEXTU: 32 < pos + size <= 64
+class DEXT_MMR6_DESC : EXTBITS_DESC_BASE<"dext", GPR64Opnd, uimm5,
+ uimm5_plus1, MipsExt>;
+class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5,
+ uimm5_plus33, MipsExt>;
+class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32,
+ uimm5_plus1, MipsExt>;
+
+class DALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> : MMR6Arch<instr_asm>, MipsR6Inst {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
+ list<dag> Pattern = [];
+}
+
+class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>;
+
+class DDIV_MM64R6_DESC : ArithLogicR<"ddiv", GPR32Opnd>;
+class DMOD_MM64R6_DESC : ArithLogicR<"dmod", GPR32Opnd>;
+class DDIVU_MM64R6_DESC : ArithLogicR<"ddivu", GPR32Opnd>;
+class DMODU_MM64R6_DESC : ArithLogicR<"dmodu", GPR32Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "MicroMipsR6" in {
+ def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6;
+ def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DEXTU_MM64R6 : StdMMR6Rel, DEXTU_MMR6_DESC, DEXTU_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DALIGN_MM64R6 : StdMMR6Rel, DALIGN_MMR6_DESC, DALIGN_MMR6_ENC,
+ ISA_MICROMIPS64R6;
+ def DDIV_MM64R6 : R6MMR6Rel, DDIV_MM64R6_DESC, DDIV_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DMOD_MM64R6 : R6MMR6Rel, DMOD_MM64R6_DESC, DMOD_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DDIVU_MM64R6 : R6MMR6Rel, DDIVU_MM64R6_DESC, DDIVU_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+ def DMODU_MM64R6 : R6MMR6Rel, DMODU_MM64R6_DESC, DMODU_MM64R6_ENC,
+ ISA_MICROMIPS64R6;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td
new file mode 100644
index 0000000..f11c09a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrFormats.td
@@ -0,0 +1,244 @@
+//===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class MMDSPInst<string opstr = "">
+ : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ let AdditionalPredicates = [InMicroMips];
+ string BaseOpcode = opstr;
+ string Arch = "mmdsp";
+ let DecoderNamespace = "MicroMips";
+}
+
+class MMDSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ let AdditionalPredicates = [InMicroMips];
+}
+
+class POOL32A_3R_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_2R_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_3RB0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_2RSA4_FMT<string opstr, bits<12> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11-0} = op;
+}
+
+class POOL32A_2RSA3_FMT<string opstr, bits<7> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<3> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-13} = sa;
+ let Inst{12-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RSA5B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = sa;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_2RSA4B0_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11} = 0b0;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_2RSA4OP6_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<4> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-12} = sa;
+ let Inst{11-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RIMM5AC_FMT<string opstr, bits<8> funct> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> imm;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = imm;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_2RSA5_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<5> rs;
+ bits<5> sa;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rs;
+ let Inst{15-11} = sa;
+ let Inst{10-0} = op;
+}
+
+class POOL32A_1RMEMB0_FMT<string opstr, bits<10> funct> : MMDSPInst<opstr> {
+ bits<5> index;
+ bits<5> base;
+ bits<5> rd;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0b0;
+ let Inst{9-0} = funct;
+}
+
+class POOL32A_1RAC_FMT<string instr_asm, bits<8> funct> : MMDSPInst<instr_asm> {
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = 0;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = funct;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RMASK7_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<7> mask;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = rt;
+ let Inst{20-14} = mask;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_1RIMM10_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<5> rd;
+ bits<10> imm;
+
+ let Inst{31-26} = 0;
+ let Inst{25-16} = imm;
+ let Inst{15-11} = rd;
+ let Inst{10} = 0;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_1RIMM8_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+ bits<5> rt;
+ bits<8> imm;
+
+ let Inst{31-26} = 0;
+ let Inst{25-21} = rt;
+ let Inst{20-13} = imm;
+ let Inst{12} = 0;
+ let Inst{11-6} = op;
+ let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_4B0SHIFT6AC4B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+ bits<6> shift;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-22} = 0b0000;
+ let Inst{21-16} = shift;
+ let Inst{15-14} = ac;
+ let Inst{13-10} = 0b0000;
+ let Inst{9-0} = op;
+}
+
+class POOL32A_5B01RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+ bits<5> rs;
+ bits<2> ac;
+
+ let Inst{31-26} = 0b000000;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = rs;
+ let Inst{15-14} = ac;
+ let Inst{13-6} = op;
+ let Inst{5-0} = 0b111100;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
new file mode 100644
index 0000000..b342e23
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -0,0 +1,528 @@
+//===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips DSP instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// Instruction encoding.
+class ADDQ_PH_MM_ENC : POOL32A_3R_FMT<"addq.ph", 0b00000001101>;
+class ADDQ_S_PH_MM_ENC : POOL32A_3R_FMT<"addq_s.ph", 0b10000001101>;
+class ADDQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"addq_s.w", 0b1100000101>;
+class ADDQH_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh.ph", 0b00001001101>;
+class ADDQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.ph", 0b10001001101>;
+class ADDQH_W_MMR2_ENC: POOL32A_3R_FMT<"addqh.w", 0b00010001101>;
+class ADDQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.w", 0b10010001101>;
+class ADDU_PH_MMR2_ENC : POOL32A_3R_FMT<"addu.ph", 0b00100001101>;
+class ADDU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"addu_s.ph", 0b10100001101>;
+class ADDU_QB_MM_ENC : POOL32A_3R_FMT<"addu.qb", 0b00011001101>;
+class ADDU_S_QB_MM_ENC : POOL32A_3R_FMT<"addu_s.qb", 0b10011001101>;
+class ADDUH_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh.qb", 0b00101001101>;
+class ADDUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh_r.qb", 0b10101001101>;
+class ADDSC_MM_ENC : POOL32A_3RB0_FMT<"addsc", 0b1110000101>;
+class ADDWC_MM_ENC : POOL32A_3RB0_FMT<"addwc", 0b1111000101>;
+class DPA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpa.w.ph", 0b00000010>;
+class DPAQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpaq_s.w.ph", 0b00001010>;
+class DPAQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpaq_sa.l.w", 0b01001010>;
+class DPAQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_s.w.ph", 0b10001010>;
+class DPAQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_sa.w.ph", 0b11001010>;
+class DPAU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbl", 0b10000010>;
+class DPAU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbr", 0b11000010>;
+class DPAX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpax.w.ph", 0b01000010>;
+class ABSQ_S_PH_MM_ENC : POOL32A_2R_FMT<"absq_s.ph", 0b0001000100>;
+class ABSQ_S_W_MM_ENC : POOL32A_2R_FMT<"absq_s.w", 0b0010000100>;
+class ABSQ_S_QB_MMR2_ENC : POOL32A_2R_FMT<"absq_s.qb", 0b0000000100>;
+class INSV_MM_ENC : POOL32A_2R_FMT<"insv", 0b0100000100>;
+class MADD_DSP_MM_ENC : POOL32A_2RAC_FMT<"madd", 0b00101010>;
+class MADDU_DSP_MM_ENC : POOL32A_2RAC_FMT<"maddu", 0b01101010>;
+class MSUB_DSP_MM_ENC : POOL32A_2RAC_FMT<"msub", 0b10101010>;
+class MSUBU_DSP_MM_ENC : POOL32A_2RAC_FMT<"msubu", 0b11101010>;
+class MULT_DSP_MM_ENC : POOL32A_2RAC_FMT<"mult", 0b00110010>;
+class MULTU_DSP_MM_ENC : POOL32A_2RAC_FMT<"multu", 0b01110010>;
+class SHLL_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll.ph", 0b001110110101>;
+class SHLL_S_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll_s.ph", 0b101110110101>;
+class SHLL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shll.qb", 0b0100001>;
+class SHLLV_PH_MM_ENC : POOL32A_3R_FMT<"shllv.ph", 0b00000001110>;
+class SHLLV_S_PH_MM_ENC : POOL32A_3R_FMT<"shllv_s.ph", 0b10000001110>;
+class SHLLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shllv.qb", 0b1110010101>;
+class SHLLV_S_W_MM_ENC : POOL32A_3RB0_FMT<"shllv_s.w", 0b1111010101>;
+class SHLL_S_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shll_s.w", 0b1111110101>;
+class SHRA_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra.qb", 0b0000111>;
+class SHRA_R_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra_r.qb", 0b1000111>;
+class SHRA_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra.ph", 0b01100110101>;
+class SHRA_R_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra_r.ph", 0b11100110101>;
+class SHRAV_PH_MM_ENC : POOL32A_3R_FMT<"shrav.ph", 0b00110001101>;
+class SHRAV_R_PH_MM_ENC : POOL32A_3R_FMT<"shrav_r.ph", 0b10110001101>;
+class SHRAV_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav.qb", 0b00111001101>;
+class SHRAV_R_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav_r.qb", 0b10111001101>;
+class SHRAV_R_W_MM_ENC : POOL32A_3RB0_FMT<"shrav_r.w", 0b1011010101>;
+class SHRA_R_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shra_r.w", 0b1011110101>;
+class SHRL_PH_MMR2_ENC : POOL32A_2RSA4OP6_FMT<"shrl.ph", 0b001111>;
+class SHRL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shrl.qb", 0b1100001>;
+class SHRLV_PH_MMR2_ENC : POOL32A_3RB0_FMT<"shrlv.ph", 0b1100010101>;
+class SHRLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shrlv.qb", 0b1101010101>;
+class PRECEQ_W_PHL_MM_ENC : POOL32A_2R_FMT<"preceq.w.phl", 0b0101000100>;
+class PRECEQ_W_PHR_MM_ENC : POOL32A_2R_FMT<"preceq.w.phr", 0b0110000100>;
+class PRECEQU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbl", 0b0111000100>;
+class PRECEQU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbla", 0b0111001100>;
+class PRECEQU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbr", 0b1001000100>;
+class PRECEQU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbra", 0b1001001100>;
+class PRECEU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbl", 0b1011000100>;
+class PRECEU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbla", 0b1011001100>;
+class PRECEU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbr", 0b1101000100>;
+class PRECEU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbra", 0b1101001100>;
+class SUBQ_PH_MM_ENC : POOL32A_3R_FMT<"subq.ph", 0b01000001101>;
+class SUBQ_S_PH_MM_ENC : POOL32A_3R_FMT<"subq_s.ph", 0b11000001101>;
+class SUBQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"subq_s.w", 0b1101000101>;
+class SUBQH_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh.ph", 0b01001001101>;
+class SUBQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.ph", 0b11001001101>;
+class SUBQH_W_MMR2_ENC : POOL32A_3R_FMT<"subqh.w", 0b01010001101>;
+class SUBQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.w", 0b11010001101>;
+class SUBU_PH_MMR2_ENC : POOL32A_3R_FMT<"subu.ph", 0b01100001101>;
+class SUBU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"subu_s.ph", 0b11100001101>;
+class SUBU_QB_MM_ENC : POOL32A_3R_FMT<"subu.qb", 0b01011001101>;
+class SUBU_S_QB_MM_ENC : POOL32A_3R_FMT<"subu_s.qb", 0b11011001101>;
+class SUBUH_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh.qb", 0b01101001101>;
+class SUBUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh_r.qb", 0b11101001101>;
+class EXTP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extp", 0b10011001>;
+class EXTPDP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extpdp", 0b11011001>;
+class EXTPDPV_MM_ENC : POOL32A_2RAC_FMT<"extpdpv", 0b11100010>;
+class EXTPV_MM_ENC : POOL32A_2RAC_FMT<"extpv", 0b10100010>;
+class EXTR_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr.w", 0b00111001>;
+class EXTR_R_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_r.w", 0b01111001>;
+class EXTR_RS_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_rs.w", 0b10111001>;
+class EXTR_S_H_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_s.h", 0b11111001>;
+class EXTRV_W_MM_ENC : POOL32A_2RAC_FMT<"extrv.w", 0b00111010>;
+class EXTRV_R_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_r.w", 0b01111010>;
+class EXTRV_RS_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_rs.w", 0b10111010>;
+class EXTRV_S_H_MM_ENC : POOL32A_2RAC_FMT<"extrv_s.h", 0b11111010>;
+class DPS_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dps.w.ph", 0b00010010>;
+class DPSQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpsq_s.w.ph", 0b00011010>;
+class DPSQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpsq_sa.l.w", 0b01011010>;
+class DPSQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_s.w.ph", 0b10011010>;
+class DPSQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_sa.w.ph", 0b11011010>;
+class DPSU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbl", 0b10010010>;
+class DPSU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbr", 0b11010010>;
+class DPSX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsx.w.ph", 0b01010010>;
+class MUL_PH_MMR2_ENC : POOL32A_3R_FMT<"mul.ph", 0b00000101101>;
+class MUL_S_PH_MMR2_ENC : POOL32A_3R_FMT<"mul_s.ph", 0b10000101101>;
+class MULEQ_S_W_PHL_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phl", 0b0000100101>;
+class MULEQ_S_W_PHR_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phr", 0b0001100101>;
+class MULEU_S_PH_QBL_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbl", 0b0010010101>;
+class MULEU_S_PH_QBR_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbr", 0b0011010101>;
+class MULQ_RS_PH_MM_ENC : POOL32A_3RB0_FMT<"mulq_rs.ph", 0b0100010101>;
+class MULQ_RS_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_rs.w", 0b0110010101>;
+class MULQ_S_PH_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.ph", 0b0101010101>;
+class MULQ_S_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.w", 0b0111010101>;
+class PRECR_QB_PH_MMR2_ENC : POOL32A_3RB0_FMT<"precr.qb.ph", 0b0001101101>;
+class PRECR_SRA_PH_W_MMR2_ENC
+ : POOL32A_2RSA5_FMT<"precr_sra.ph.w", 0b01111001101>;
+class PRECR_SRA_R_PH_W_MMR2_ENC
+ : POOL32A_2RSA5_FMT<"precr_sra_r.ph.w", 0b11111001101>;
+class PRECRQ_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq.ph.w", 0b0011101101>;
+class PRECRQ_QB_PH_MM_ENC : POOL32A_3RB0_FMT<"precrq.qb.ph", 0b0010101101>;
+class PRECRQU_S_QB_PH_MM_ENC
+ : POOL32A_3RB0_FMT<"precrqu_s.qb.ph", 0b0101101101>;
+class PRECRQ_RS_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq_rs.ph.w", 0b0100101101>;
+class LBUX_MM_ENC : POOL32A_1RMEMB0_FMT<"lbux", 0b1000100101>;
+class LHX_MM_ENC : POOL32A_1RMEMB0_FMT<"lhx", 0b0101100101>;
+class LWX_MM_ENC : POOL32A_1RMEMB0_FMT<"lwx", 0b0110100101>;
+class MAQ_S_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phl", 0b01101001>;
+class MAQ_SA_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phl", 0b11101001>;
+class MAQ_S_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phr", 0b00101001>;
+class MAQ_SA_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phr", 0b10101001>;
+class MFHI_MM_ENC : POOL32A_1RAC_FMT<"mfhi", 0b00000001>;
+class MFLO_MM_ENC : POOL32A_1RAC_FMT<"mflo", 0b01000001>;
+class MTHI_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b10000001>;
+class MTLO_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b11000001>;
+class PREPEND_MMR2_ENC : POOL32A_2RSA5B0_FMT<"prepend", 0b1001010101>;
+class RADDU_W_QB_MM_ENC : POOL32A_2R_FMT<"raddu.w.qb", 0b1111000100>;
+class RDDSP_MM_ENC : POOL32A_1RMASK7_FMT<"rddsp", 0b00011001>;
+class REPL_PH_MM_ENC : POOL32A_1RIMM10_FMT<"repl.ph", 0b0000111101>;
+class REPL_QB_MM_ENC : POOL32A_1RIMM8_FMT<"repl.qb", 0b010111>;
+class REPLV_PH_MM_ENC : POOL32A_2R_FMT<"replv.ph", 0b0000001100>;
+class REPLV_QB_MM_ENC : POOL32A_2R_FMT<"replv.qb", 0b0001001100>;
+class MTHLIP_MM_ENC : POOL32A_1RAC_FMT<"mthlip", 0b00001001>;
+class PACKRL_PH_MM_ENC : POOL32A_3RB0_FMT<"packrl.ph", 0b0110101101>;
+class PICK_PH_MM_ENC : POOL32A_3RB0_FMT<"pick.ph", 0b1000101101>;
+class PICK_QB_MM_ENC : POOL32A_3RB0_FMT<"pick.qb", 0b0111101101>;
+class SHILO_MM_ENC : POOL32A_4B0SHIFT6AC4B0_FMT<"shilo", 0b0000011101>;
+class SHILOV_MM_ENC : POOL32A_5B01RAC_FMT<"shilov", 0b01001001>;
+class WRDSP_MM_ENC : POOL32A_1RMASK7_FMT<"wrdsp", 0b01011001>;
+
+// Instruction desc.
+class ABSQ_S_PH_MM_R2_DESC_BASE<string opstr, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterOperand ROD,
+ RegisterOperand ROS = ROD> {
+ dag OutOperandList = (outs ROD:$rt);
+ dag InOperandList = (ins ROS:$rs);
+ string AsmString = !strconcat(opstr, "\t$rt, $rs");
+ list<dag> Pattern = [(set ROD:$rt, (OpNode ROS:$rs))];
+ InstrItinClass Itinerary = itin;
+}
+class ABSQ_S_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.ph", int_mips_absq_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_W_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.w", int_mips_absq_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_QB_MMR2_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "absq_s.qb", int_mips_absq_s_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class PRECEQ_W_PHL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceq.w.phl", int_mips_preceq_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQ_W_PHR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceq.w.phr", int_mips_preceq_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbl", int_mips_precequ_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbla", int_mips_precequ_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbr", int_mips_precequ_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "precequ.ph.qbra", int_mips_precequ_ph_qbra, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbl", int_mips_preceu_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbla", int_mips_preceu_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbr", int_mips_preceu_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+ "preceu.ph.qbra", int_mips_preceu_ph_qbra, NoItinerary, DSPROpnd>;
+
+class SHLL_R2_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ SDPatternOperator ImmPat, InstrItinClass itin,
+ RegisterOperand RO, Operand ImmOpnd> {
+ dag OutOperandList = (outs RO:$rt);
+ dag InOperandList = (ins RO:$rs, ImmOpnd:$sa);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+ list<dag> Pattern = [(set RO:$rt, (OpNode RO:$rs, ImmPat:$sa))];
+ InstrItinClass Itinerary = itin;
+ bit hasSideEffects = 1;
+}
+class SHLL_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_S_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll_s.ph", int_mips_shll_s_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>,
+ Defs<[DSPOutFlag22]>;
+class SHLL_S_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>,
+ Defs<[DSPOutFlag22]>;
+class SHRA_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_R_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.qb", int_mips_shra_r_qb, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.ph", int_mips_shra_r_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shra_r.w", int_mips_shra_r_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>;
+class SHRL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+ "shrl.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRL_PH_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+ "shrl.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+
+class SHLLV_R3_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin, RegisterOperand RO> {
+ dag OutOperandList = (outs RO:$rd);
+ dag InOperandList = (ins RO:$rt, GPR32Opnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs");
+ list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))];
+ InstrItinClass Itinerary = itin;
+}
+class SHLLV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv.ph", int_mips_shll_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPROpnd>,
+ Defs<[DSPOutFlag22]>;
+class SHLLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv.qb", int_mips_shll_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shllv_s.w", int_mips_shll_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>;
+class SHRAV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>;
+class SHRAV_R_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>;
+class SHRAV_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>;
+class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>;
+class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+ "shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>;
+
+class EXT_MM_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs");
+ InstrItinClass Itinerary = itin;
+}
+class EXT_MM_1R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm");
+ InstrItinClass Itinerary = itin;
+}
+
+class EXTP_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTPDP_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPDPV_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPV_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+ Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTR_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_R_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_RS_W_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTR_S_H_MM_DESC
+ : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_R_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_RS_W_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_S_H_MM_DESC
+ : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>,
+ Defs<[DSPOutFlag23]>;
+
+class MFHI_MM_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
+ InstrItinClass itin> {
+ dag OutOperandList = (outs GPR32Opnd:$rs);
+ dag InOperandList = (ins RO:$ac);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+ list<dag> Pattern = [(set GPR32Opnd:$rs, (OpNode RO:$ac))];
+ InstrItinClass Itinerary = itin;
+}
+
+class MFHI_MM_DESC : MFHI_MM_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI,
+ NoItinerary>;
+class MFLO_MM_DESC : MFHI_MM_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO,
+ NoItinerary>;
+
+class RADDU_W_QB_MM_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins DSPROpnd:$rs);
+ string AsmString = !strconcat("raddu.w.qb", "\t$rt, $rs");
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_raddu_w_qb DSPROpnd:$rs))];
+ InstrItinClass Itinerary = NoItinerary;
+ string BaseOpcode = "raddu.w.qb";
+}
+
+class RDDSP_MM_DESC {
+ dag OutOperandList = (outs GPR32Opnd:$rt);
+ dag InOperandList = (ins uimm16:$mask);
+ string AsmString = !strconcat("rddsp", "\t$rt, $mask");
+ list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt10:$mask))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPL_QB_MM_DESC {
+ dag OutOperandList = (outs DSPROpnd:$rt);
+ dag InOperandList = (ins uimm16:$imm);
+ string AsmString = !strconcat("repl.qb", "\t$rt, $imm");
+ list<dag> Pattern = [(set DSPROpnd:$rt, (int_mips_repl_qb immZExt8:$imm))];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPLV_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
+class REPLV_QB_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+ NoItinerary, DSPROpnd,
+ GPR32Opnd>;
+
+class WRDSP_MM_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
+ string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
+ list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
+ InstrItinClass Itinerary = NoItinerary;
+}
+
+// Instruction defs.
+// microMIPS DSP Rev 1
+def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH_MM : DspMMRel, ADDQ_S_PH_MM_ENC, ADDQ_S_PH_DESC;
+def ADDQ_S_W_MM : DspMMRel, ADDQ_S_W_MM_ENC, ADDQ_S_W_DESC;
+def ADDU_QB_MM : DspMMRel, ADDU_QB_MM_ENC, ADDU_QB_DESC;
+def ADDU_S_QB_MM : DspMMRel, ADDU_S_QB_MM_ENC, ADDU_S_QB_DESC;
+def ADDSC_MM : DspMMRel, ADDSC_MM_ENC, ADDSC_DESC;
+def ADDWC_MM : DspMMRel, ADDWC_MM_ENC, ADDWC_DESC;
+def DPAQ_S_W_PH_MM : DspMMRel, DPAQ_S_W_PH_MM_ENC, DPAQ_S_W_PH_DESC;
+def DPAQ_SA_L_W_MM : DspMMRel, DPAQ_SA_L_W_MM_ENC, DPAQ_SA_L_W_DESC;
+def DPAU_H_QBL_MM : DspMMRel, DPAU_H_QBL_MM_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR_MM : DspMMRel, DPAU_H_QBR_MM_ENC, DPAU_H_QBR_DESC;
+def ABSQ_S_PH_MM : DspMMRel, ABSQ_S_PH_MM_ENC, ABSQ_S_PH_MM_DESC;
+def ABSQ_S_W_MM : DspMMRel, ABSQ_S_W_MM_ENC, ABSQ_S_W_MM_DESC;
+def INSV_MM : DspMMRel, INSV_MM_ENC, INSV_DESC;
+def MADD_DSP_MM : DspMMRel, MADD_DSP_MM_ENC, MADD_DSP_DESC;
+def MADDU_DSP_MM : DspMMRel, MADDU_DSP_MM_ENC, MADDU_DSP_DESC;
+def MSUB_DSP_MM : DspMMRel, MSUB_DSP_MM_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP_MM : DspMMRel, MSUBU_DSP_MM_ENC, MSUBU_DSP_DESC;
+def MULT_DSP_MM : DspMMRel, MULT_DSP_MM_ENC, MULT_DSP_DESC;
+def MULTU_DSP_MM : DspMMRel, MULTU_DSP_MM_ENC, MULTU_DSP_DESC;
+def SHLL_PH_MM : DspMMRel, SHLL_PH_MM_ENC, SHLL_PH_MM_DESC;
+def SHLL_S_PH_MM : DspMMRel, SHLL_S_PH_MM_ENC, SHLL_S_PH_MM_DESC;
+def SHLL_QB_MM : DspMMRel, SHLL_QB_MM_ENC, SHLL_QB_MM_DESC;
+def SHLLV_PH_MM : DspMMRel, SHLLV_PH_MM_ENC, SHLLV_PH_MM_DESC;
+def SHLLV_S_PH_MM : DspMMRel, SHLLV_S_PH_MM_ENC, SHLLV_S_PH_MM_DESC;
+def SHLLV_QB_MM : DspMMRel, SHLLV_QB_MM_ENC, SHLLV_QB_MM_DESC;
+def SHLLV_S_W_MM : DspMMRel, SHLLV_S_W_MM_ENC, SHLLV_S_W_MM_DESC;
+def SHLL_S_W_MM : DspMMRel, SHLL_S_W_MM_ENC, SHLL_S_W_MM_DESC;
+def SHRA_PH_MM : DspMMRel, SHRA_PH_MM_ENC, SHRA_PH_MM_DESC;
+def SHRA_R_PH_MM : DspMMRel, SHRA_R_PH_MM_ENC, SHRA_R_PH_MM_DESC;
+def SHRAV_PH_MM : DspMMRel, SHRAV_PH_MM_ENC, SHRAV_PH_MM_DESC;
+def SHRAV_R_PH_MM : DspMMRel, SHRAV_R_PH_MM_ENC, SHRAV_R_PH_MM_DESC;
+def SHRAV_R_W_MM : DspMMRel, SHRAV_R_W_MM_ENC, SHRAV_R_W_MM_DESC;
+def SHRA_R_W_MM : DspMMRel, SHRA_R_W_MM_ENC, SHRA_R_W_MM_DESC;
+def SHRL_QB_MM : DspMMRel, SHRL_QB_MM_ENC, SHRL_QB_MM_DESC;
+def SHRLV_QB_MM : DspMMRel, SHRLV_QB_MM_ENC, SHRLV_QB_MM_DESC;
+def PRECEQ_W_PHL_MM : DspMMRel, PRECEQ_W_PHL_MM_ENC, PRECEQ_W_PHL_MM_DESC;
+def PRECEQ_W_PHR_MM : DspMMRel, PRECEQ_W_PHR_MM_ENC, PRECEQ_W_PHR_MM_DESC;
+def PRECEQU_PH_QBL_MM : DspMMRel, PRECEQU_PH_QBL_MM_ENC, PRECEQU_PH_QBL_MM_DESC;
+def PRECEQU_PH_QBLA_MM : DspMMRel, PRECEQU_PH_QBLA_MM_ENC,
+ PRECEQU_PH_QBLA_MM_DESC;
+def PRECEQU_PH_QBR_MM : DspMMRel, PRECEQU_PH_QBR_MM_ENC, PRECEQU_PH_QBR_MM_DESC;
+def PRECEQU_PH_QBRA_MM : DspMMRel, PRECEQU_PH_QBRA_MM_ENC,
+ PRECEQU_PH_QBRA_MM_DESC;
+def PRECEU_PH_QBL_MM : DspMMRel, PRECEU_PH_QBL_MM_ENC, PRECEU_PH_QBL_MM_DESC;
+def PRECEU_PH_QBLA_MM : DspMMRel, PRECEU_PH_QBLA_MM_ENC, PRECEU_PH_QBLA_MM_DESC;
+def PRECEU_PH_QBR_MM : DspMMRel, PRECEU_PH_QBR_MM_ENC, PRECEU_PH_QBR_MM_DESC;
+def PRECEU_PH_QBRA_MM : DspMMRel, PRECEU_PH_QBRA_MM_ENC, PRECEU_PH_QBRA_MM_DESC;
+def SUBQ_PH_MM : DspMMRel, SUBQ_PH_MM_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH_MM : DspMMRel, SUBQ_S_PH_MM_ENC, SUBQ_S_PH_DESC;
+def SUBQ_S_W_MM : DspMMRel, SUBQ_S_W_MM_ENC, SUBQ_S_W_DESC;
+def SUBU_QB_MM : DspMMRel, SUBU_QB_MM_ENC, SUBU_QB_DESC;
+def SUBU_S_QB_MM : DspMMRel, SUBU_S_QB_MM_ENC, SUBU_S_QB_DESC;
+def EXTP_MM : DspMMRel, EXTP_MM_ENC, EXTP_MM_DESC;
+def EXTPDP_MM : DspMMRel, EXTPDP_MM_ENC, EXTPDP_MM_DESC;
+def EXTPDPV_MM : DspMMRel, EXTPDPV_MM_ENC, EXTPDPV_MM_DESC;
+def EXTPV_MM : DspMMRel, EXTPV_MM_ENC, EXTPV_MM_DESC;
+def EXTR_W_MM : DspMMRel, EXTR_W_MM_ENC, EXTR_W_MM_DESC;
+def EXTR_R_W_MM : DspMMRel, EXTR_R_W_MM_ENC, EXTR_R_W_MM_DESC;
+def EXTR_RS_W_MM : DspMMRel, EXTR_RS_W_MM_ENC, EXTR_RS_W_MM_DESC;
+def EXTR_S_H_MM : DspMMRel, EXTR_S_H_MM_ENC, EXTR_S_H_MM_DESC;
+def EXTRV_W_MM : DspMMRel, EXTRV_W_MM_ENC, EXTRV_W_MM_DESC;
+def EXTRV_R_W_MM : DspMMRel, EXTRV_R_W_MM_ENC, EXTRV_R_W_MM_DESC;
+def EXTRV_RS_W_MM : DspMMRel, EXTRV_RS_W_MM_ENC, EXTRV_RS_W_MM_DESC;
+def EXTRV_S_H_MM : DspMMRel, EXTRV_S_H_MM_ENC, EXTRV_S_H_MM_DESC;
+def DPSQ_S_W_PH_MM : DspMMRel, DPSQ_S_W_PH_MM_ENC, DPSQ_S_W_PH_DESC;
+def DPSQ_SA_L_W_MM : DspMMRel, DPSQ_SA_L_W_MM_ENC, DPSQ_SA_L_W_DESC;
+def DPSU_H_QBL_MM : DspMMRel, DPSU_H_QBL_MM_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR_MM : DspMMRel, DPSU_H_QBR_MM_ENC, DPSU_H_QBR_DESC;
+def MULEQ_S_W_PHL_MM : DspMMRel, MULEQ_S_W_PHL_MM_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR_MM : DspMMRel, MULEQ_S_W_PHR_MM_ENC, MULEQ_S_W_PHR_DESC;
+def MULEU_S_PH_QBL_MM : DspMMRel, MULEU_S_PH_QBL_MM_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR_MM : DspMMRel, MULEU_S_PH_QBR_MM_ENC, MULEU_S_PH_QBR_DESC;
+def MULQ_RS_PH_MM : DspMMRel, MULQ_RS_PH_MM_ENC, MULQ_RS_PH_DESC;
+def PRECRQ_PH_W_MM : DspMMRel, PRECRQ_PH_W_MM_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_QB_PH_MM : DspMMRel, PRECRQ_QB_PH_MM_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQU_S_QB_PH_MM : DspMMRel, PRECRQU_S_QB_PH_MM_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECRQ_RS_PH_W_MM : DspMMRel, PRECRQ_RS_PH_W_MM_ENC, PRECRQ_RS_PH_W_DESC;
+def LBUX_MM : DspMMRel, LBUX_MM_ENC, LBUX_DESC;
+def LHX_MM : DspMMRel, LHX_MM_ENC, LHX_DESC;
+def LWX_MM : DspMMRel, LWX_MM_ENC, LWX_DESC;
+def MAQ_S_W_PHL_MM : DspMMRel, MAQ_S_W_PHL_MM_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_SA_W_PHL_MM : DspMMRel, MAQ_SA_W_PHL_MM_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_S_W_PHR_MM : DspMMRel, MAQ_S_W_PHR_MM_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHR_MM : DspMMRel, MAQ_SA_W_PHR_MM_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP_MM : DspMMRel, MFHI_MM_ENC, MFHI_MM_DESC;
+def MFLO_DSP_MM : DspMMRel, MFLO_MM_ENC, MFLO_MM_DESC;
+def MTHI_DSP_MM : DspMMRel, MTHI_MM_ENC, MTHI_DESC;
+def MTLO_DSP_MM : DspMMRel, MTLO_MM_ENC, MTLO_DESC;
+def RADDU_W_QB_MM : DspMMRel, RADDU_W_QB_MM_ENC, RADDU_W_QB_MM_DESC;
+def RDDSP_MM : DspMMRel, RDDSP_MM_ENC, RDDSP_MM_DESC;
+def REPL_PH_MM : DspMMRel, REPL_PH_MM_ENC, REPL_PH_DESC;
+def REPL_QB_MM : DspMMRel, REPL_QB_MM_ENC, REPL_QB_MM_DESC;
+def REPLV_PH_MM : DspMMRel, REPLV_PH_MM_ENC, REPLV_PH_MM_DESC;
+def REPLV_QB_MM : DspMMRel, REPLV_QB_MM_ENC, REPLV_QB_MM_DESC;
+def MTHLIP_MM : DspMMRel, MTHLIP_MM_ENC, MTHLIP_DESC;
+def PACKRL_PH_MM : DspMMRel, PACKRL_PH_MM_ENC, PACKRL_PH_DESC;
+def PICK_PH_MM : DspMMRel, PICK_PH_MM_ENC, PICK_PH_DESC;
+def PICK_QB_MM : DspMMRel, PICK_QB_MM_ENC, PICK_QB_DESC;
+def SHILO_MM : DspMMRel, SHILO_MM_ENC, SHILO_DESC;
+def SHILOV_MM : DspMMRel, SHILOV_MM_ENC, SHILOV_DESC;
+def WRDSP_MM : DspMMRel, WRDSP_MM_ENC, WRDSP_MM_DESC;
+// microMIPS DSP Rev 2
+def ABSQ_S_QB_MMR2 : DspMMRel, ABSQ_S_QB_MMR2_ENC, ABSQ_S_QB_MMR2_DESC,
+ ISA_DSPR2;
+def ADDQH_PH_MMR2 : DspMMRel, ADDQH_PH_MMR2_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH_MMR2 : DspMMRel, ADDQH_R_PH_MMR2_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W_MMR2 : DspMMRel, ADDQH_W_MMR2_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W_MMR2 : DspMMRel, ADDQH_R_W_MMR2_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def ADDU_PH_MMR2 : DspMMRel, ADDU_PH_MMR2_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH_MMR2 : DspMMRel, ADDU_S_PH_MMR2_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def ADDUH_QB_MMR2 : DspMMRel, ADDUH_QB_MMR2_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB_MMR2 : DspMMRel, ADDUH_R_QB_MMR2_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def DPA_W_PH_MMR2 : DspMMRel, DPA_W_PH_MMR2_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH_MMR2 : DspMMRel, DPAQX_S_W_PH_MMR2_ENC, DPAQX_S_W_PH_DESC,
+ ISA_DSPR2;
+def DPAQX_SA_W_PH_MMR2 : DspMMRel, DPAQX_SA_W_PH_MMR2_ENC, DPAQX_SA_W_PH_DESC,
+ ISA_DSPR2;
+def DPAX_W_PH_MMR2 : DspMMRel, DPAX_W_PH_MMR2_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def SHRA_QB_MMR2 : DspMMRel, SHRA_QB_MMR2_ENC, SHRA_QB_MMR2_DESC, ISA_DSPR2;
+def SHRA_R_QB_MMR2 : DspMMRel, SHRA_R_QB_MMR2_ENC, SHRA_R_QB_MMR2_DESC,
+ ISA_DSPR2;
+def SHRAV_QB_MMR2 : DspMMRel, SHRAV_QB_MMR2_ENC, SHRAV_QB_MMR2_DESC, ISA_DSPR2;
+def SHRAV_R_QB_MMR2 : DspMMRel, SHRAV_R_QB_MMR2_ENC, SHRAV_R_QB_MMR2_DESC,
+ ISA_DSPR2;
+def SHRL_PH_MMR2 : DspMMRel, SHRL_PH_MMR2_ENC, SHRL_PH_MMR2_DESC, ISA_DSPR2;
+def SHRLV_PH_MMR2 : DspMMRel, SHRLV_PH_MMR2_ENC, SHRLV_PH_MMR2_DESC, ISA_DSPR2;
+def SUBQH_PH_MMR2 : DspMMRel, SUBQH_PH_MMR2_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH_MMR2 : DspMMRel, SUBQH_R_PH_MMR2_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_W_MMR2 : DspMMRel, SUBQH_W_MMR2_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W_MMR2 : DspMMRel, SUBQH_R_W_MMR2_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def SUBU_PH_MMR2 : DspMMRel, SUBU_PH_MMR2_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH_MMR2 : DspMMRel, SUBU_S_PH_MMR2_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def SUBUH_QB_MMR2 : DspMMRel, SUBUH_QB_MMR2_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB_MMR2 : DspMMRel, SUBUH_R_QB_MMR2_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def DPS_W_PH_MMR2 : DspMMRel, DPS_W_PH_MMR2_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH_MMR2 : DspMMRel, DPSQX_S_W_PH_MMR2_ENC, DPSQX_S_W_PH_DESC,
+ ISA_DSPR2;
+def DPSQX_SA_W_PH_MMR2 : DspMMRel, DPSQX_SA_W_PH_MMR2_ENC, DPSQX_SA_W_PH_DESC,
+ ISA_DSPR2;
+def DPSX_W_PH_MMR2 : DspMMRel, DPSX_W_PH_MMR2_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def MUL_PH_MMR2 : DspMMRel, MUL_PH_MMR2_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH_MMR2 : DspMMRel, MUL_S_PH_MMR2_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_RS_W_MMR2 : DspMMRel, MULQ_RS_W_MMR2_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH_MMR2 : DspMMRel, MULQ_S_PH_MMR2_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W_MMR2 : DspMMRel, MULQ_S_W_MMR2_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def PRECR_QB_PH_MMR2 : DspMMRel, PRECR_QB_PH_MMR2_ENC, PRECR_QB_PH_DESC,
+ ISA_DSPR2;
+def PRECR_SRA_PH_W_MMR2 : DspMMRel, PRECR_SRA_PH_W_MMR2_ENC,
+ PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W_MMR2 : DspMMRel, PRECR_SRA_R_PH_W_MMR2_ENC,
+ PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def PREPEND_MMR2 : DspMMRel, PREPEND_MMR2_ENC, PREPEND_DESC, ISA_DSPR2;
+
+// Instruction alias.
+def : MMDSPInstAlias<"wrdsp $rt", (WRDSP_MM GPR32Opnd:$rt, 0x1F), 1>;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 004b0d5..756e6c9 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -37,23 +37,14 @@ def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
CEQS_FM_MM<1>;
-def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>,
+def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>,
+def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>,
BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6;
-
-def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
- ROUND_W_FM_MM<0, 0x6c>;
def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
ROUND_W_FM_MM<0, 0x24>;
-def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
- ROUND_W_FM_MM<0, 0x2c>;
-def ROUND_W_S_MM : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
ROUND_W_FM_MM<0, 0xec>;
-def TRUNC_W_S_MM : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
- ROUND_W_FM_MM<0, 0xac>;
-def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
- fsqrt>, ROUND_W_FM_MM<0, 0x28>;
def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>,
ROUND_W_FM_MM<1, 0x6c>;
@@ -61,7 +52,7 @@ def CVT_W_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
ROUND_W_FM_MM<1, 0x24>;
def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>,
ROUND_W_FM_MM<1, 0x2c>;
-def ROUND_W_MM : MMRel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
+def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
ROUND_W_FM_MM<1, 0xec>;
def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>,
ROUND_W_FM_MM<1, 0xac>;
@@ -146,3 +137,14 @@ def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
MADDS_FM_MM<0x2a>;
}
+
+let AdditionalPredicates = [InMicroMips] in {
+ def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd,
+ II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>;
+ def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd,
+ FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>;
+ def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+ ROUND_W_FM_MM<0, 0x6c>;
+ def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
+ fsqrt>, ROUND_W_FM_MM<0, 0x28>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 560afa4..b736367 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -389,6 +389,22 @@ class LW_FM_MM<bits<6> op> : MMArch {
let Inst{15-0} = addr{15-0};
}
+class POOL32C_LHUE_FM_MM<bits<6> op, bits<4> fmt, bits<3> funct> : MMArch {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = fmt;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
class LWL_FM_MM<bits<4> funct> {
bits<5> rt;
bits<21> addr;
@@ -402,6 +418,22 @@ class LWL_FM_MM<bits<4> funct> {
let Inst{11-0} = addr{11-0};
}
+class POOL32C_STEVA_LDEVA_FM_MM<bits<4> type, bits<3> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = type;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
class CMov_F_I_FM_MM<bits<7> func> : MMArch {
bits<5> rd;
bits<5> rs;
@@ -655,6 +687,22 @@ class LL_FM_MM<bits<4> funct> {
let Inst{11-0} = addr{11-0};
}
+class LLE_FM_MM<bits<4> funct> {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x18;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = base;
+ let Inst{15-12} = funct;
+ let Inst{11-9} = 0x6;
+ let Inst{8-0} = offset;
+}
+
class ADDS_FM_MM<bits<2> fmt, bits<8> funct> : MMArch {
bits<5> ft;
bits<5> fs;
@@ -895,7 +943,7 @@ class LWM_FM_MM<bits<4> funct> : MMArch {
let Inst{11-0} = addr{11-0};
}
-class LWM_FM_MM16<bits<4> funct> : MMArch {
+class LWM_FM_MM16<bits<4> funct> : MMArch, PredicateControl {
bits<2> rt;
bits<4> addr;
@@ -922,6 +970,37 @@ class CACHE_PREF_FM_MM<bits<6> op, bits<4> funct> : MMArch {
let Inst{11-0} = offset;
}
+class CACHE_PREFE_FM_MM<bits<6> op, bits<3> funct> : MMArch {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = hint;
+ let Inst{20-16} = base;
+ let Inst{15-12} = 0xA;
+ let Inst{11-9} = funct;
+ let Inst{8-0} = offset;
+}
+
+class POOL32F_PREFX_FM_MM<bits<6> op, bits<9> funct> : MMArch {
+ bits<5> index;
+ bits<5> base;
+ bits<5> hint;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = index;
+ let Inst{20-16} = base;
+ let Inst{15-11} = hint;
+ let Inst{10-9} = 0x0;
+ let Inst{8-0} = funct;
+}
+
class BARRIER_FM_MM<bits<5> op> : MMArch {
bits<32> Inst;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 3939384..99f0f44 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -13,11 +13,6 @@ def simm12 : Operand<i32> {
let DecoderMethod = "DecodeSimm12";
}
-def uimm5_lsl2 : Operand<OtherVT> {
- let EncoderMethod = "getUImm5Lsl2Encoding";
- let DecoderMethod = "DecodeUImm5lsl2";
-}
-
def uimm6_lsl2 : Operand<i32> {
let EncoderMethod = "getUImm6Lsl2Encoding";
let DecoderMethod = "DecodeUImm6Lsl2";
@@ -30,6 +25,7 @@ def simm9_addiusp : Operand<i32> {
def uimm3_shift : Operand<i32> {
let EncoderMethod = "getUImm3Mod8Encoding";
+ let DecoderMethod = "DecodePOOL16BEncodedField";
}
def simm3_lsa2 : Operand<i32> {
@@ -105,6 +101,14 @@ def mem_mm_gp_imm7_lsl2 : Operand<i32> {
let EncoderMethod = "getMemEncodingMMGPImm7Lsl2";
}
+def mem_mm_9 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm9);
+ let EncoderMethod = "getMemEncodingMMImm9";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
def mem_mm_12 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops GPR32, simm12);
@@ -113,6 +117,14 @@ def mem_mm_12 : Operand<i32> {
let OperandType = "OPERAND_MEMORY";
}
+def mem_mm_16 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops GPR32, simm16);
+ let EncoderMethod = "getMemEncodingMMImm16";
+ let ParserMatchClass = MipsMemAsmOperand;
+ let OperandType = "OPERAND_MEMORY";
+}
+
def MipsMemUimm4AsmOperand : AsmOperandClass {
let Name = "MemOffsetUimm4";
let SuperClasses = [MipsMemAsmOperand];
@@ -166,7 +178,7 @@ def simm23_lsl2 : Operand<i32> {
class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 0;
@@ -251,6 +263,13 @@ class LLBaseMM<string opstr, RegisterOperand RO> :
let mayLoad = 1;
}
+class LLEBaseMM<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm9";
+ let mayLoad = 1;
+}
+
class SCBaseMM<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
@@ -259,6 +278,14 @@ class SCBaseMM<string opstr, RegisterOperand RO> :
let Constraints = "$rt = $dst";
}
+class SCEBaseMM<string opstr, RegisterOperand RO> :
+ InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
+ !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+ let DecoderMethod = "DecodeMemMMImm9";
+ let mayStore = 1;
+ let Constraints = "$rt = $dst";
+}
+
class LoadMM<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
InstrItinClass Itin = NoItinerary> :
InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
@@ -392,7 +419,7 @@ class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO> :
// 16-bit Jump and Link (Call)
class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [(MipsJmpLink RO:$rs)], IIBranch, FrmR> {
+ [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, PredicateControl {
let isCall = 1;
let hasDelaySlot = 1;
let Defs = [RA];
@@ -401,7 +428,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
// 16-bit Jump Reg
class JumpRegMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JR, FrmR> {
let hasDelaySlot = 1;
let isBranch = 1;
let isIndirectBranch = 1;
@@ -410,7 +437,7 @@ class JumpRegMM16<string opstr, RegisterOperand RO> :
// Base class for JRADDIUSP instruction.
class JumpRAddiuStackMM16 :
MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm",
- [], IIBranch, FrmR> {
+ [], II_JRADDIUSP, FrmR> {
let isTerminator = 1;
let isBarrier = 1;
let isBranch = 1;
@@ -420,7 +447,7 @@ class JumpRAddiuStackMM16 :
// 16-bit Jump and Link (Call) - Short Delay Slot
class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JALRS, FrmR> {
let isCall = 1;
let hasDelaySlot = 1;
let Defs = [RA];
@@ -429,7 +456,7 @@ class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
// 16-bit Jump Register Compact - No delay slot
class JumpRegCMM16<string opstr, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
- [], IIBranch, FrmR> {
+ [], II_JRC, FrmR> {
let isTerminator = 1;
let isBarrier = 1;
let isBranch = 1;
@@ -444,7 +471,7 @@ class BrkSdbbp16MM<string opstr> :
class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZ, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -455,18 +482,18 @@ class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
class JumpLinkMM<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [], IIBranch, FrmJ, opstr> {
+ [], II_JALS, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTargetMM";
}
class JumpLinkRegMM<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], II_JALRS, FrmR>;
class BranchCompareToZeroLinkMM<string opstr, DAGOperand opnd,
RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>;
}
class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
@@ -475,6 +502,10 @@ class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
+class PrefetchIndexed<string opstr> :
+ InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint),
+ !strconcat(opstr, "\t$hint, ${index}(${base})"), [], NoItinerary, FrmOther>;
+
class AddImmUPC<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
!strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>;
@@ -543,7 +574,7 @@ class LoadMultMM16<string opstr,
class UncondBranchMM16<string opstr> :
MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
!strconcat(opstr, "\t$offset"),
- [], IIBranch, FrmI> {
+ [], II_B, FrmI> {
let isBranch = 1;
let isTerminator = 1;
let isBarrier = 1;
@@ -553,21 +584,24 @@ class UncondBranchMM16<string opstr> :
}
def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
- ARITH_FM_MM16<0>;
-def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
- ARITH_FM_MM16<1>;
-def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>;
+ ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
- LOGIC_FM_MM16<0x2>;
-def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
- LOGIC_FM_MM16<0x3>;
-def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
- LOGIC_FM_MM16<0x1>;
-def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>;
+ LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6_64R6;
+def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, LOGIC_FM_MM16<0x3>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
- SHIFT_FM_MM16<0>;
+ SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
- SHIFT_FM_MM16<1>;
+ SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+
+def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+ ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+ LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6_64R6;
def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
mem_mm_4>, LOAD_STORE_FM_MM16<0x02>;
def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
@@ -597,7 +631,8 @@ def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16;
def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16,
IsAsCheapAsAMove;
-def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
+def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>;
def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>;
def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>;
@@ -607,8 +642,18 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
BEQNEZ_FM_MM16<0x2b>;
def B16_MM : UncondBranchMM16<"b16">, B16_FM;
-def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>;
-def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>;
+def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>,
+ ISA_MICROMIPS_NOT_32R6_64R6;
+
+let DecoderNamespace = "MicroMips" in {
+ /// Load and Store Instructions - multiple
+ def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+ def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+}
class WaitMM<string opstr> :
InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
@@ -701,6 +746,18 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def SW_MM : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
}
+ let DecoderMethod = "DecodeMemMMImm9" in {
+ def LBE_MM : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>;
+ def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>;
+ def LHE_MM : Load<"lhe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>;
+ def LHuE_MM : Load<"lhue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>;
+ def LWE_MM : Load<"lwe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>;
+ def SBE_MM : Store<"sbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>;
+ def SHE_MM : Store<"she", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>;
+ def SWE_MM : StoreMemory<"swe", GPR32Opnd, mem_simm9gpr>,
+ POOL32C_LHUE_FM_MM<0x18, 0xa, 0x7>;
+ }
+
def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>;
@@ -714,12 +771,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
LWL_FM_MM<0x8>;
def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
LWL_FM_MM<0x9>;
+ let DecoderMethod = "DecodeMemMMImm9" in {
+ def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>;
+ def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>;
+ def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x0>;
+ def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_12>,
+ POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x1>, ISA_MIPS1_NOT_32R6_64R6;
+ }
/// Load and Store Instructions - multiple
def SWM32_MM : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
def LWM32_MM : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
- def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>;
- def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>;
/// Load and Store Pair Instructions
def SWP_MM : StorePairMM<"swp">, LWM_FM_MM<0x9>;
@@ -777,11 +842,11 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
SEB_FM_MM<0x0ec>, ISA_MIPS32R2;
/// Word Swap Bytes Within Halfwords
- def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>,
- ISA_MIPS32R2;
-
- def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
- EXT_FM_MM<0x2c>;
+ def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>,
+ SEB_FM_MM<0x1ec>, ISA_MIPS32R2;
+ // TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+ def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1,
+ MipsExt>, EXT_FM_MM<0x2c>;
def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>,
EXT_FM_MM<0x0c>;
@@ -854,12 +919,22 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>;
+ def LLE_MM : LLEBaseMM<"lle", GPR32Opnd>, LLE_FM_MM<0x6>;
+ def SCE_MM : SCEBaseMM<"sce", GPR32Opnd>, LLE_FM_MM<0xA>;
+
let DecoderMethod = "DecodeCacheOpMM" in {
def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>,
CACHE_PREF_FM_MM<0x08, 0x6>;
def PREF_MM : MMRel, CacheOp<"pref", mem_mm_12>,
CACHE_PREF_FM_MM<0x18, 0x2>;
}
+
+ let DecoderMethod = "DecodePrefeOpMM" in {
+ def PREFE_MM : MMRel, CacheOp<"prefe", mem_mm_9>,
+ CACHE_PREFE_FM_MM<0x18, 0x2>;
+ def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9>,
+ CACHE_PREFE_FM_MM<0x18, 0x3>;
+ }
def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>;
def EHB_MM : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>;
def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>;
@@ -870,7 +945,13 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM;
- def RDHWR_MM : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM;
+
+ def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>;
+}
+
+let DecoderNamespace = "MicroMips" in {
+ def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
+ RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
}
let Predicates = [InMicroMips] in {
@@ -928,7 +1009,7 @@ class UncondBranchMMPseudo<string opstr> :
MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
!strconcat(opstr, "\t$offset")>;
- def B_MM_Pseudo : UncondBranchMMPseudo<"b">;
+def B_MM_Pseudo : UncondBranchMMPseudo<"b">, ISA_MICROMIPS;
def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
@@ -937,4 +1018,17 @@ class UncondBranchMMPseudo<string opstr> :
let Predicates = [InMicroMips] in {
def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"di", (DI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tge $rs, $rt",
+ (TGE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tne $rs, $rt",
+ (TNE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index dbb5f7b..35352b6 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -154,9 +154,14 @@ def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
"Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureDSPR3
+ : SubtargetFeature<"dspr3", "HasDSPR3", "true", "Mips DSP-R3 ASE",
+ [ FeatureDSP, FeatureDSPR2 ]>;
def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
+def FeatureEVA : SubtargetFeature<"eva", "HasEVA", "true", "Mips EVA ASE">;
+
def FeatureMicroMips : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
"microMips mode">;
@@ -164,10 +169,19 @@ def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips",
"true", "Octeon cnMIPS Support",
[FeatureMips64r2]>;
+def FeatureUseTCCInDIV : SubtargetFeature<
+ "use-tcc-in-div",
+ "UseTCCInDIV", "false",
+ "Force the assembler to use trapping">;
+
//===----------------------------------------------------------------------===//
// Mips processors supported.
//===----------------------------------------------------------------------===//
+def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
+ "MipsSubtarget::CPU::P5600",
+ "The P5600 Processor", [FeatureMips32r5]>;
+
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
@@ -187,12 +201,11 @@ def : Proc<"mips64r2", [FeatureMips64r2]>;
def : Proc<"mips64r3", [FeatureMips64r3]>;
def : Proc<"mips64r5", [FeatureMips64r5]>;
def : Proc<"mips64r6", [FeatureMips64r6]>;
-def : Proc<"mips16", [FeatureMips16]>;
def : Proc<"octeon", [FeatureMips64r2, FeatureCnMips]>;
+def : ProcessorModel<"p5600", MipsP5600Model, [ImplP5600]>;
def MipsAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
- let MnemonicContainsDot = 1;
}
def MipsAsmParserVariant : AsmParserVariant {
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index 46cc99c..26426c0 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -39,7 +39,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
const Mips16InstrInfo &TII =
*static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+
uint64_t StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
@@ -107,7 +111,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
+ MachineBasicBlock *EntryBlock = &MF->front();
//
// Registers RA, S0,S1 are the callee saved registers and they
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 893fc7c..b2bc7e7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -40,26 +40,17 @@ namespace {
const MipsTargetMachine &TM;
};
- class InlineAsmHelper {
- LLVMContext &C;
- BasicBlock *BB;
- public:
- InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
- C(C_), BB(BB_) {
- }
-
- void Out(StringRef AsmString) {
- std::vector<llvm::Type *> AsmArgTypes;
- std::vector<llvm::Value*> AsmArgs;
-
- llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
- AsmArgTypes, false);
- llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
- /* IsAlignStack */ false,
- llvm::InlineAsm::AD_ATT);
- CallInst::Create(IA, AsmArgs, "", BB);
- }
- };
+ static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
+ std::vector<llvm::Type *> AsmArgTypes;
+ std::vector<llvm::Value *> AsmArgs;
+
+ llvm::FunctionType *AsmFTy =
+ llvm::FunctionType::get(Type::getVoidTy(C), AsmArgTypes, false);
+ llvm::InlineAsm *IA =
+ llvm::InlineAsm::get(AsmFTy, AsmText, "", true,
+ /* IsAlignStack */ false, llvm::InlineAsm::AD_ATT);
+ CallInst::Create(IA, AsmArgs, "", BB);
+ }
char Mips16HardFloat::ID = 0;
}
@@ -182,7 +173,7 @@ static bool needsFPReturnHelper(Function &F) {
return whichFPReturnVariant(RetType) != NoFPRet;
}
-static bool needsFPReturnHelper(const FunctionType &FT) {
+static bool needsFPReturnHelper(FunctionType &FT) {
Type* RetType = FT.getReturnType();
return whichFPReturnVariant(RetType) != NoFPRet;
}
@@ -195,63 +186,72 @@ static bool needsFPHelperFromSig(Function &F) {
// We swap between FP and Integer registers to allow Mips16 and Mips32 to
// interoperate
//
-static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
- bool LE, bool ToFP) {
- //LLVMContext &Context = M->getContext();
- std::string MI = ToFP? "mtc1 ": "mfc1 ";
+static std::string swapFPIntParams(FPParamVariant PV, Module *M, bool LE,
+ bool ToFP) {
+ std::string MI = ToFP ? "mtc1 ": "mfc1 ";
+ std::string AsmText;
+
switch (PV) {
case FSig:
- IAH.Out(MI + "$$4,$$f12");
+ AsmText += MI + "$$4, $$f12\n";
break;
+
case FFSig:
- IAH.Out(MI +"$$4,$$f12");
- IAH.Out(MI + "$$5,$$f14");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f14\n";
break;
+
case FDSig:
- IAH.Out(MI + "$$4,$$f12");
+ AsmText += MI + "$$4, $$f12\n";
if (LE) {
- IAH.Out(MI + "$$6,$$f14");
- IAH.Out(MI + "$$7,$$f15");
+ AsmText += MI + "$$6, $$f14\n";
+ AsmText += MI + "$$7, $$f15\n";
} else {
- IAH.Out(MI + "$$7,$$f14");
- IAH.Out(MI + "$$6,$$f15");
+ AsmText += MI + "$$7, $$f14\n";
+ AsmText += MI + "$$6, $$f15\n";
}
break;
+
case DSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
}
break;
+
case DDSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
- IAH.Out(MI + "$$6,$$f14");
- IAH.Out(MI + "$$7,$$f15");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
+ AsmText += MI + "$$6, $$f14\n";
+ AsmText += MI + "$$7, $$f15\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
- IAH.Out(MI + "$$7,$$f14");
- IAH.Out(MI + "$$6,$$f15");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
+ AsmText += MI + "$$7, $$f14\n";
+ AsmText += MI + "$$6, $$f15\n";
}
break;
+
case DFSig:
if (LE) {
- IAH.Out(MI + "$$4,$$f12");
- IAH.Out(MI + "$$5,$$f13");
+ AsmText += MI + "$$4, $$f12\n";
+ AsmText += MI + "$$5, $$f13\n";
} else {
- IAH.Out(MI + "$$5,$$f12");
- IAH.Out(MI + "$$4,$$f13");
+ AsmText += MI + "$$5, $$f12\n";
+ AsmText += MI + "$$4, $$f13\n";
}
- IAH.Out(MI + "$$6,$$f14");
+ AsmText += MI + "$$6, $$f14\n";
break;
+
case NoSig:
- return;
+ break;
}
+
+ return AsmText;
}
//
@@ -282,68 +282,77 @@ static void assureFPCallStub(Function &F, Module *M,
FStub->addFnAttr("nomips16");
FStub->setSection(SectionName);
BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
- InlineAsmHelper IAH(Context, BB);
- IAH.Out(".set reorder");
FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType());
FPParamVariant PV = whichFPParamVariantNeeded(F);
- swapFPIntParams(PV, M, IAH, LE, true);
+
+ std::string AsmText;
+ AsmText += ".set reorder\n";
+ AsmText += swapFPIntParams(PV, M, LE, true);
if (RV != NoFPRet) {
- IAH.Out("move $$18, $$31");
- IAH.Out("jal " + Name);
+ AsmText += "move $$18, $$31\n";
+ AsmText += "jal " + Name + "\n";
} else {
- IAH.Out("lui $$25,%hi(" + Name + ")");
- IAH.Out("addiu $$25,$$25,%lo(" + Name + ")" );
+ AsmText += "lui $$25, %hi(" + Name + ")\n";
+ AsmText += "addiu $$25, $$25, %lo(" + Name + ")\n";
}
+
switch (RV) {
case FRet:
- IAH.Out("mfc1 $$2,$$f0");
+ AsmText += "mfc1 $$2, $$f0\n";
break;
+
case DRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f1");
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f1\n";
} else {
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$2,$$f1");
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$2, $$f1\n";
}
break;
+
case CFRet:
if (LE) {
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f2\n";
} else {
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$3,$$f2");
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$3, $$f2\n";
}
break;
+
case CDRet:
if (LE) {
- IAH.Out("mfc1 $$4,$$f2");
- IAH.Out("mfc1 $$5,$$f3");
- IAH.Out("mfc1 $$2,$$f0");
- IAH.Out("mfc1 $$3,$$f1");
+ AsmText += "mfc1 $$4, $$f2\n";
+ AsmText += "mfc1 $$5, $$f3\n";
+ AsmText += "mfc1 $$2, $$f0\n";
+ AsmText += "mfc1 $$3, $$f1\n";
} else {
- IAH.Out("mfc1 $$5,$$f2");
- IAH.Out("mfc1 $$4,$$f3");
- IAH.Out("mfc1 $$3,$$f0");
- IAH.Out("mfc1 $$2,$$f1");
+ AsmText += "mfc1 $$5, $$f2\n";
+ AsmText += "mfc1 $$4, $$f3\n";
+ AsmText += "mfc1 $$3, $$f0\n";
+ AsmText += "mfc1 $$2, $$f1\n";
}
break;
+
case NoFPRet:
break;
}
+
if (RV != NoFPRet)
- IAH.Out("jr $$18");
+ AsmText += "jr $$18\n";
else
- IAH.Out("jr $$25");
+ AsmText += "jr $$25\n";
+ EmitInlineAsm(Context, BB, AsmText);
+
new UnreachableInst(Context, BB);
}
//
// Functions that are llvm intrinsics and don't need helpers.
//
-static const char *IntrinsicInline[] = {
+static const char *const IntrinsicInline[] = {
"fabs", "fabsf",
"llvm.ceil.f32", "llvm.ceil.f64",
"llvm.copysign.f32", "llvm.copysign.f64",
@@ -395,7 +404,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
Type *T = RVal->getType();
FPReturnVariant RV = whichFPReturnVariant(T);
if (RV == NoFPRet) continue;
- static const char* Helper[NoFPRet] = {
+ static const char *const Helper[NoFPRet] = {
"__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
"__mips16_ret_dc"
};
@@ -419,11 +428,11 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
const Value* V = CI->getCalledValue();
- const Type* T = nullptr;
+ Type* T = nullptr;
if (V) T = V->getType();
- const PointerType *PFT=nullptr;
+ PointerType *PFT = nullptr;
if (T) PFT = dyn_cast<PointerType>(T);
- const FunctionType *FT=nullptr;
+ FunctionType *FT = nullptr;
if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
Function *F_ = CI->getCalledFunction();
if (FT && needsFPReturnHelper(*FT) &&
@@ -469,20 +478,21 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
FStub->addFnAttr("nomips16");
FStub->setSection(SectionName);
BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
- InlineAsmHelper IAH(Context, BB);
+
+ std::string AsmText;
if (PicMode) {
- IAH.Out(".set noreorder");
- IAH.Out(".cpload $$25");
- IAH.Out(".set reorder");
- IAH.Out(".reloc 0,R_MIPS_NONE," + Name);
- IAH.Out("la $$25," + LocalName);
- }
- else {
- IAH.Out("la $$25," + Name);
- }
- swapFPIntParams(PV, M, IAH, LE, false);
- IAH.Out("jr $$25");
- IAH.Out(LocalName + " = " + Name);
+ AsmText += ".set noreorder\n";
+ AsmText += ".cpload $$25\n";
+ AsmText += ".set reorder\n";
+ AsmText += ".reloc 0, R_MIPS_NONE, " + Name + "\n";
+ AsmText += "la $$25, " + LocalName + "\n";
+ } else
+ AsmText += "la $$25, " + Name + "\n";
+ AsmText += swapFPIntParams(PV, M, LE, false);
+ AsmText += "jr $$25\n";
+ AsmText += LocalName + " = " + Name + "\n";
+ EmitInlineAsm(Context, BB, AsmText);
+
new UnreachableInst(FStub->getContext(), BB);
}
@@ -535,7 +545,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
FPParamVariant V = whichFPParamVariantNeeded(*F);
if (V != NoSig) {
Modified = true;
- createFPFnStub(F, &M, V, TM);
+ createFPFnStub(&*F, &M, V, TM);
}
}
return Modified;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index bce2c1e..5a1c2c67 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 3522cbb..e748325 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -530,8 +530,7 @@ emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -592,8 +591,7 @@ Mips16TargetLowering::emitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -657,8 +655,7 @@ Mips16TargetLowering::emitSeliT16(unsigned Opc1, unsigned Opc2,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index a49572e..da8ada4 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -196,7 +196,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
const BitVector Reserved = RI.getReservedRegs(MF);
@@ -263,7 +263,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned Reg1, unsigned Reg2) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
//
// li reg1, constant
// move reg2, sp
@@ -446,7 +446,7 @@ const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
void Mips16InstrInfo::BuildAddiuSpImm
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 10fff03..dad6ea4 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -530,19 +530,19 @@ class MayStore {
// Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
// To add a constant to a 32-bit integer.
//
-def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIM16Alu>;
-def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIM16Alu>,
ArithLogic16Defs<0> {
let AddedComplexity = 5;
}
-def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIM16Alu>,
ArithLogic16Defs<0> {
let isCodeGenOnly = 1;
}
def AddiuRxRyOffMemX16:
- FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
+ FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIM16Alu>;
//
@@ -550,7 +550,7 @@ def AddiuRxRyOffMemX16:
// Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
// To add a constant to the program counter.
//
-def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIM16Alu>;
//
// Format: ADDIU sp, immediate MIPS16e
@@ -558,14 +558,14 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
// To add a constant to the stack pointer.
//
def AddiuSpImm16
- : FI816_SP_ins<0b011, "addiu", IIAlu> {
+ : FI816_SP_ins<0b011, "addiu", IIM16Alu> {
let Defs = [SP];
let Uses = [SP];
let AddedComplexity = 5;
}
def AddiuSpImmX16
- : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+ : FEXT_I816_SP_ins<0b011, "addiu", IIM16Alu> {
let Defs = [SP];
let Uses = [SP];
}
@@ -576,14 +576,14 @@ def AddiuSpImmX16
// To add 32-bit integers.
//
-def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIM16Alu>, ArithLogic16Defs<1>;
//
// Format: AND rx, ry MIPS16e
// Purpose: AND
// To do a bitwise logical AND.
-def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIM16Alu>, ArithLogic16Defs<1>;
//
@@ -591,7 +591,7 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
// Purpose: Branch on Equal to Zero
// To test a GPR then do a PC-relative conditional branch.
//
-def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
//
@@ -599,7 +599,7 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
// Purpose: Branch on Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
-def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
//
// Format: B offset MIPS16e
@@ -607,27 +607,27 @@ def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
// To do an unconditional PC-relative branch.
//
-def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16;
+def Bimm16: FI16_ins<0b00010, "b", IIM16Alu>, branch16;
// Format: B offset MIPS16e
// Purpose: Unconditional Branch
// To do an unconditional PC-relative branch.
//
-def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIM16Alu>, branch16;
//
// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero
// To test a GPR then do a PC-relative conditional branch.
//
-def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
//
// Format: BNEZ rx, offset MIPS16e
// Purpose: Branch on Not Equal to Zero (Extended)
// To test a GPR then do a PC-relative conditional branch.
//
-def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
//
@@ -641,11 +641,11 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>;
// Purpose: Branch on T Equal to Zero (Extended)
// To test special register T then do a PC-relative conditional branch.
//
-def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def Bteqz16: FI816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
@@ -669,11 +669,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
// To test special register T then do a PC-relative conditional branch.
//
-def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 {
+def Btnez16: FI816_ins<0b001, "btnez", IIM16Alu>, cbranch16 {
let Uses = [T8];
}
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIM16Alu> ,cbranch16 {
let Uses = [T8];
}
@@ -695,7 +695,7 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
// Purpose: Compare
// To compare the contents of two GPRs.
//
-def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIM16Alu> {
let Defs = [T8];
}
@@ -704,7 +704,7 @@ def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
// Purpose: Compare Immediate
// To compare a constant with the contents of a GPR.
//
-def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIM16Alu> {
let Defs = [T8];
}
@@ -713,7 +713,7 @@ def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
// Purpose: Compare Immediate (Extended)
// To compare a constant with the contents of a GPR.
//
-def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIM16Alu> {
let Defs = [T8];
}
@@ -723,7 +723,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
// Purpose: Divide Word
// To divide 32-bit signed integers.
//
-def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIM16Alu> {
let Defs = [HI0, LO0];
}
@@ -732,7 +732,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
// Purpose: Divide Unsigned Word
// To divide 32-bit unsigned integers.
//
-def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIM16Alu> {
let Defs = [HI0, LO0];
}
//
@@ -742,13 +742,13 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
// region and preserve the current ISA.
//
-def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+def Jal16 : FJAL16_ins<0b0, "jal", IIM16Alu> {
let hasDelaySlot = 0; // not true, but we add the nop for now
let isCall=1;
let Defs = [RA];
}
-def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
+def JalB16 : FJALB16_ins<0b0, "jal", IIM16Alu>, branch16 {
let hasDelaySlot = 0; // not true, but we add the nop for now
let isBranch=1;
let Defs = [RA];
@@ -761,7 +761,7 @@ def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
// address register.
//
-def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
+def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let hasDelaySlot = 1;
@@ -769,14 +769,14 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
let isBarrier=1;
}
-def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
let isBarrier=1;
}
-def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
+def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> {
let isBranch = 1;
let isIndirectBranch = 1;
let isTerminator=1;
@@ -825,16 +825,16 @@ def LhuRxRyOffMemX16:
// Purpose: Load Immediate
// To load a constant into a GPR.
//
-def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
+def LiRxImm16: FRI16_ins<0b01101, "li", IIM16Alu>;
//
// Format: LI rx, immediate MIPS16e
// Purpose: Load Immediate (Extended)
// To load a constant into a GPR.
//
-def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIM16Alu>;
-def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> {
+def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIM16Alu> {
let isCodeGenOnly = 1;
}
@@ -863,21 +863,21 @@ def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad;
// Purpose: Move
// To move the contents of a GPR to a GPR.
//
-def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+def Move32R16: FI8_MOV32R16_ins<"move", IIM16Alu>;
//
// Format: MOVE ry, r32 MIPS16e
//Purpose: Move
// To move the contents of a GPR to a GPR.
//
-def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
+def MoveR3216: FI8_MOVR3216_ins<"move", IIM16Alu>;
//
// Format: MFHI rx MIPS16e
// Purpose: Move From HI Register
// To copy the special purpose HI register to a GPR.
//
-def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
+def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIM16Alu> {
let Uses = [HI0];
let hasSideEffects = 0;
}
@@ -887,7 +887,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
// Purpose: Move From LO Register
// To copy the special purpose LO register to a GPR.
//
-def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
+def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> {
let Uses = [LO0];
let hasSideEffects = 0;
}
@@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
//
// Pseudo Instruction for mult
//
-def MultRxRy16: FMULT16_ins<"mult", IIAlu> {
+def MultRxRy16: FMULT16_ins<"mult", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
-def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
+def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
// Purpose: Multiply Word
// To multiply 32-bit signed integers.
//
-def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
+def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
// Purpose: Multiply Unsigned Word
// To multiply 32-bit unsigned integers.
//
-def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
+def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -934,21 +934,21 @@ def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
// Purpose: Negate
// To negate an integer value.
//
-def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>;
+def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIM16Alu>;
//
// Format: NOT rx, ry MIPS16e
// Purpose: Not
// To complement an integer value
//
-def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>;
+def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIM16Alu>;
//
// Format: OR rx, ry MIPS16e
// Purpose: Or
// To do a bitwise logical OR.
//
-def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIM16Alu>, ArithLogic16Defs<1>;
//
// Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
@@ -1012,7 +1012,7 @@ def SbRxRyOffMemX16:
// Sign-extend least significant byte in register rx.
//
def SebRx16
- : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>;
+ : FRR_SF16_ins<0b10001, 0b100, "seb", IIM16Alu>;
//
// Format: SEH rx MIPS16e
@@ -1020,7 +1020,7 @@ def SebRx16
// Sign-extend least significant word in register rx.
//
def SehRx16
- : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>;
+ : FRR_SF16_ins<0b10001, 0b101, "seh", IIM16Alu>;
//
// The Sel(T) instructions are pseudos
@@ -1149,21 +1149,21 @@ def ShRxRyOffMemX16:
// Purpose: Shift Word Left Logical (Extended)
// To execute a left-shift of a word by a fixed number of bits-0 to 31 bits.
//
-def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
+def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIM16Alu>;
//
// Format: SLLV ry, rx MIPS16e
// Purpose: Shift Word Left Logical Variable
// To execute a left-shift of a word by a variable number of bits.
//
-def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIM16Alu>;
// Format: SLTI rx, immediate MIPS16e
// Purpose: Set on Less Than Immediate
// To record the result of a less-than comparison with a constant.
//
//
-def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIM16Alu> {
let Defs = [T8];
}
@@ -1173,7 +1173,7 @@ def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
// To record the result of a less-than comparison with a constant.
//
//
-def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIM16Alu> {
let Defs = [T8];
}
@@ -1184,7 +1184,7 @@ def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
// To record the result of a less-than comparison with a constant.
//
//
-def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIM16Alu> {
let Defs = [T8];
}
@@ -1194,7 +1194,7 @@ def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
// To record the result of a less-than comparison with a constant.
//
//
-def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIM16Alu> {
let Defs = [T8];
}
//
@@ -1209,7 +1209,7 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
// Purpose: Set on Less Than
// To record the result of a less-than comparison.
//
-def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIM16Alu>{
let Defs = [T8];
}
@@ -1219,7 +1219,7 @@ def SltCCRxRy16: FCCRR16_ins<"slt">;
// Purpose: Set on Less Than Unsigned
// To record the result of an unsigned less-than comparison.
//
-def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIM16Alu>{
let Defs = [T8];
}
@@ -1236,7 +1236,7 @@ def SltuCCRxRy16: FCCRR16_ins<"sltu">;
// To execute an arithmetic right-shift of a word by a variable
// number of bits.
//
-def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIM16Alu>;
//
@@ -1245,7 +1245,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
// To execute an arithmetic right-shift of a word by a fixed
// number of bits-1 to 8 bits.
//
-def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIM16Alu>;
//
@@ -1254,7 +1254,7 @@ def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
// To execute a logical right-shift of a word by a variable
// number of bits.
//
-def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIM16Alu>;
//
@@ -1263,14 +1263,14 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
// To execute a logical right-shift of a word by a fixed
// number of bits-1 to 31 bits.
//
-def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIM16Alu>;
//
// Format: SUBU rz, rx, ry MIPS16e
// Purpose: Subtract Unsigned Word
// To subtract 32-bit integers
//
-def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIM16Alu>, ArithLogic16Defs<0>;
//
// Format: SW ry, offset(rx) MIPS16e
@@ -1294,7 +1294,7 @@ def SwRxSpImmX16: FEXT_RI16_SP_Store_explicit_ins
// Purpose: Xor
// To do a bitwise logical XOR.
//
-def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIM16Alu>, ArithLogic16Defs<1>;
class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [InMips16Mode];
@@ -1380,7 +1380,7 @@ def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> {
let isCall=1, hasDelaySlot=0 in
def JumpLinkReg16:
FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs),
- "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch> {
+ "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], II_JALRC> {
let Defs = [RA];
}
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index d6ab8a6..82d2c8e 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -186,54 +186,56 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
RegisterOperand FGROpnd>{
- def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
- CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
- CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
- CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
- CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+ let AdditionalPredicates = [NotInMicroMips] in {
+ def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
- CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
- CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
- CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
- CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
- CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
- CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
- CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
- CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
- CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
- CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
ISA_MIPS32R6, HARDFLOAT;
- def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
- CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
- ISA_MIPS32R6, HARDFLOAT;
- def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
- CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
ISA_MIPS32R6, HARDFLOAT;
+ def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
+ CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ ISA_MIPS32R6, HARDFLOAT;
+ }
}
//===----------------------------------------------------------------------===//
@@ -557,7 +559,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
list<dag> Pattern = [];
- string DecoderMethod = "DecodeCacheOpR6";
+ string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
}
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@@ -595,7 +597,7 @@ class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
list<dag> Pattern = [];
}
-class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rt);
@@ -685,8 +687,10 @@ def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
def CACHE_R6 : R6MMR6Rel, CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
-def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
@@ -702,39 +706,51 @@ def LSA_R6 : R6MMR6Rel, LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
-def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def MOD : R6MMR6Rel, MOD_ENC, MOD_DESC, ISA_MIPS32R6;
def MODU : R6MMR6Rel, MODU_ENC, MODU_DESC, ISA_MIPS32R6;
-def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def MUH : R6MMR6Rel, MUH_ENC, MUH_DESC, ISA_MIPS32R6;
def MUHU : R6MMR6Rel, MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
def MULU : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6;
def NAL; // BAL with rd=0
def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6;
-def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
+let AdditionalPredicates = [NotInMicroMips] in {
def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
+}
def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
def SELEQZ : R6MMR6Rel, SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SELNEZ : R6MMR6Rel, SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+ def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
//===----------------------------------------------------------------------===//
@@ -743,7 +759,9 @@ def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
//
//===----------------------------------------------------------------------===//
+let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+}
def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
//===----------------------------------------------------------------------===//
@@ -752,84 +770,78 @@ def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
//
//===----------------------------------------------------------------------===//
-// f32 comparisons supported via another comparison
-def : MipsPat<(setone f32:$lhs, f32:$rhs),
- (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f32:$lhs, f32:$rhs),
- (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f32:$lhs, f32:$rhs),
- (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setne f32:$lhs, f32:$rhs),
- (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-
-// f64 comparisons supported via another comparison
-def : MipsPat<(setone f64:$lhs, f64:$rhs),
- (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f64:$lhs, f64:$rhs),
- (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f64:$lhs, f64:$rhs),
- (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
- ISA_MIPS32R6;
-def : MipsPat<(setne f64:$lhs, f64:$rhs),
- (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+// comparisons supported via another comparison
+multiclass Cmp_Pats<ValueType VT, Instruction NOROp, Register ZEROReg> {
+def : MipsPat<(setone VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_UEQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seto VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_UN_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(setune VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seteq VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setgt VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LE_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setge VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LT_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setlt VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LT_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setle VT:$lhs, VT:$rhs),
+ (!cast<Instruction>("CMP_LE_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setne VT:$lhs, VT:$rhs),
+ (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+}
+
+defm S : Cmp_Pats<f32, NOR, ZERO>, ISA_MIPS32R6;
+defm D : Cmp_Pats<f64, NOR, ZERO>, ISA_MIPS32R6;
// i32 selects
+multiclass SelectInt_Pats<ValueType RC, Instruction OROp, Instruction XORiOp,
+ Instruction SLTiOp, Instruction SLTiuOp,
+ Instruction SELEQZOp, Instruction SELNEZOp,
+ SDPatternOperator imm_type, ValueType Opg> {
+// reg, immz
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, RC:$cond), (SELNEZOp RC:$f, RC:$cond))>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f),
+ (OROp (SELNEZOp RC:$t, RC:$cond), (SELEQZOp RC:$f, RC:$cond))>;
+
+// reg, immZExt16[_64]
+def : MipsPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+ (SELNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+def : MipsPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+ (OROp (SELNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+ (SELEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+
+// reg, immSExt16Plus1
+def : MipsPat<(select (Opg (setgt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))),
+ (SELNEZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>;
+def : MipsPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+ (OROp (SELEQZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))),
+ (SELNEZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>;
+
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz),
+ (SELEQZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz),
+ (SELNEZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f),
+ (SELNEZOp RC:$f, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f),
+ (SELEQZOp RC:$f, RC:$cond)>;
+}
+
+defm : SelectInt_Pats<i32, OR, XORi, SLTi, SLTiu, SELEQZ, SELNEZ,
+ immZExt16, i32>, ISA_MIPS32R6;
+
def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
- (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
+ (OR (SELNEZ i32:$t, i32:$cond),
+ (SELEQZ i32:$f, i32:$cond))>,
ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
- (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
- (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
- i32:$f),
- (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))),
- (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>,
- ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)),
- i32:$t, i32:$f),
- (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))),
- (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>,
- ISA_MIPS32R6;
-
def : MipsPat<(select i32:$cond, i32:$t, immz),
- (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz),
- (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz),
- (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+ (SELNEZ i32:$t, i32:$cond)>,
+ ISA_MIPS32R6;
def : MipsPat<(select i32:$cond, immz, i32:$f),
- (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f),
- (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f),
- (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+ (SELEQZ i32:$f, i32:$cond)>,
+ ISA_MIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index f917eca..cbdcdd7 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -16,10 +16,6 @@
//===----------------------------------------------------------------------===//
// Unsigned Operand
-def uimm5_64 : Operand<i64> {
- let PrintMethod = "printUnsignedImm";
-}
-
def uimm16_64 : Operand<i64> {
let PrintMethod = "printUnsignedImm";
}
@@ -276,12 +272,20 @@ def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
let isCodeGenOnly = 1 in
def RDHWR64 : ReadHardware<GPR64Opnd, HWRegsOpnd>, RDHWR_FM;
-def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>;
-def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>;
-def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>;
+let AdditionalPredicates = [NotInMicroMips] in {
+ // TODO: Add 'pos + size' constraint check to dext* instructions
+ // DEXT: 0 < pos + size <= 63
+ // DEXTM, DEXTU: 32 < pos + size <= 64
+ def DEXT : ExtBase<"dext", GPR64Opnd, uimm5, uimm5_plus1, MipsExt>,
+ EXT_FM<3>;
+ def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5, uimm5_plus33, MipsExt>,
+ EXT_FM<1>;
+ def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32, uimm5_plus1,
+ MipsExt>, EXT_FM<2>;
+}
def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>;
-def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>;
+def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>;
def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>;
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -341,11 +345,11 @@ class SetCC64_I<string opstr, PatFrag cond_op>:
}
class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO, bits<64> shift = 1> :
- InstSE<(outs), (ins RO:$rs, uimm5_64:$p, opnd:$offset),
+ RegisterOperand RO, Operand ImmOp, bits<64> shift = 1> :
+ InstSE<(outs), (ins RO:$rs, ImmOp:$p, opnd:$offset),
!strconcat(opstr, "\t$rs, $p, $offset"),
[(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)),
- bb:$offset)], IIBranch, FrmI, opstr> {
+ bb:$offset)], II_BBIT, FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
let hasDelaySlot = 1;
@@ -363,14 +367,17 @@ def BADDu : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>,
ADD_FM<0x1c, 0x28>;
// Branch on Bit Clear /+32
-def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd>, BBIT_FM<0x32>;
-def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, 0x100000000>,
+def BBIT0 : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd,
+ uimm5_64_report_uimm6>, BBIT_FM<0x32>;
+def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, uimm5_64,
+ 0x100000000>,
BBIT_FM<0x36>;
// Branch on Bit Set /+32
-def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd>, BBIT_FM<0x3a>;
-def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, 0x100000000>,
- BBIT_FM<0x3e>;
+def BBIT1 : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd,
+ uimm5_64_report_uimm6>, BBIT_FM<0x3a>;
+def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, uimm5_64,
+ 0x100000000>, BBIT_FM<0x3e>;
// Multiply Doubleword to GPR
let Defs = [HI0, LO0, P0, P1, P2] in
@@ -544,10 +551,25 @@ def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst
(BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>;
}
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB64 addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH64 addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>;
+def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"move $dst, $src",
+ (OR64 GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
+ GPR_64;
+def : MipsInstAlias<"move $dst, $src",
(DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
GPR_64;
def : MipsInstAlias<"daddu $rs, $rt, $imm",
@@ -617,6 +639,38 @@ def : MipsInstAlias<"syncw", (SYNC 0x4), 0>;
def : MipsInstAlias<"syncws", (SYNC 0x5), 0>;
}
+// cnMIPS Aliases.
+
+// bbit* with $p 32-63 converted to bbit*32 with $p 0-31
+def : MipsInstAlias<"bbit0 $rs, $p, $offset",
+ (BBIT032 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+ brtarget:$offset), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"bbit1 $rs, $p, $offset",
+ (BBIT132 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+ brtarget:$offset), 0>,
+ ASE_CNMIPS;
+
+// exts with $pos 32-63 in converted to exts32 with $pos 0-31
+def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1",
+ (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"exts $rt, $pos, $lenm1",
+ (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+
+// cins with $pos 32-63 in converted to cins32 with $pos 0-31
+def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1",
+ (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+def : MipsInstAlias<"cins $rt, $pos, $lenm1",
+ (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+ uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+ ASE_CNMIPS;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -625,3 +679,8 @@ class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
!strconcat(instr_asm, "\t$rt, $imm64")> ;
def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
+
+def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr),
+ "dla\t$rt, $addr">;
+def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64),
+ "dla\t$rt, $imm64">;
diff --git a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index 6b546e8..6f34dbe 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -62,7 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
-class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2_plus1>;
class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
@@ -81,10 +81,12 @@ class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
//
//===----------------------------------------------------------------------===//
-def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
-def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
-def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
-def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
+ def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+ def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+ def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
+}
def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index fdba064..9575293 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -169,12 +169,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MCPE.isMachineConstantPoolEntry())
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
- EmitGlobalConstant(MCPE.Val.ConstVal);
+ EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
return;
}
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
@@ -202,7 +202,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
llvm_unreachable("Pseudo opcode found in EmitInstruction()");
MCInst TmpInst0;
- MCInstLowering.Lower(I, TmpInst0);
+ MCInstLowering.Lower(&*I, TmpInst0);
EmitToStreamer(*OutStreamer, TmpInst0);
} while ((++I != E) && I->isInsideBundle()); // Delay slot check
}
@@ -405,7 +405,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
// If this is a landing pad, it isn't a fall through. If it has no preds,
// then nothing falls through to it.
- if (MBB->isLandingPad() || MBB->pred_empty())
+ if (MBB->isEHPad() || MBB->pred_empty())
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
@@ -559,7 +559,6 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
bool closeP = false;
@@ -608,7 +607,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
}
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI"
+ O << getDataLayout().getPrivateGlobalPrefix() << "CPI"
<< getFunctionNumber() << "_" << MO.getIndex();
if (MO.getOffset())
O << "+" << MO.getOffset();
@@ -1009,7 +1008,7 @@ void MipsAsmPrinter::EmitFPCallStub(
//
// Mov $18, $31
- EmitInstrRegRegReg(*STI, Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO);
+ EmitInstrRegRegReg(*STI, Mips::OR, Mips::S2, Mips::RA, Mips::ZERO);
EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true);
diff --git a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
index b808129..d82063e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -29,22 +29,16 @@ static bool isF128SoftLibCall(const char *CallSym) {
"powl", "rintl", "sinl", "sqrtl",
"truncl"};
- const char *const *End = LibCalls + array_lengthof(LibCalls);
-
// Check that LibCalls is sorted alphabetically.
- MipsTargetLowering::LTStr Comp;
-
-#ifndef NDEBUG
- for (const char *const *I = LibCalls; I < End - 1; ++I)
- assert(Comp(*I, *(I + 1)));
-#endif
-
- return std::binary_search(LibCalls, End, CallSym, Comp);
+ auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
+ assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
+ return std::binary_search(std::begin(LibCalls), std::end(LibCalls),
+ CallSym, Comp);
}
/// This function returns true if Ty is fp128, {f128} or i128 which was
/// originally a fp128.
-static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
if (Ty->isFP128Ty())
return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
index 93e1908..0b4b778 100644
--- a/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/contrib/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -427,3 +427,28 @@ def CSR_Mips16RetHelper :
CalleeSavedRegs<(add V0, V1, FP,
(sequence "A%u", 3, 0), (sequence "S%u", 7, 0),
(sequence "D%u", 15, 10))>;
+
+def CSR_Interrupt_32R6 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+ (sequence "S%u", 7, 0),
+ (sequence "V%u", 1, 0),
+ (sequence "T%u", 9, 0),
+ RA, FP, GP, AT)>;
+
+def CSR_Interrupt_32 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+ (sequence "S%u", 7, 0),
+ (sequence "V%u", 1, 0),
+ (sequence "T%u", 9, 0),
+ RA, FP, GP, AT, LO0, HI0)>;
+
+def CSR_Interrupt_64R6 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+ (sequence "V%u_64", 1, 0),
+ (sequence "S%u_64", 7, 0),
+ (sequence "T%u_64", 9, 0),
+ RA_64, FP_64, GP_64, AT_64)>;
+
+def CSR_Interrupt_64 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+ (sequence "S%u_64", 7, 0),
+ (sequence "T%u_64", 9, 0),
+ (sequence "V%u_64", 1, 0),
+ RA_64, FP_64, GP_64, AT_64,
+ LO0_64, HI0_64)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 96553d2..ea8c587 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -560,7 +560,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
// identity mapping of CPI's to CPE's.
const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
- const DataLayout &TD = *MF->getTarget().getDataLayout();
+ const DataLayout &TD = MF->getDataLayout();
for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
assert(Size >= 4 && "Too small constant pool entry");
@@ -598,12 +598,12 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
/// into the block immediately after it.
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
+ MachineFunction::iterator MBBI = MBB->getIterator();
// Can't fall off end of function.
if (std::next(MBBI) == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
+ MachineBasicBlock *NextBB = &*std::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
@@ -656,11 +656,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ computeBlockSize(&*I);
// Compute block offsets.
- adjustBBOffsetsAfter(MF->begin());
+ adjustBBOffsetsAfter(&MF->front());
// Now go back through the instructions and build up our data structures.
for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -879,7 +879,7 @@ MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr
// Create a new MBB for the code after the OrigBB.
MachineBasicBlock *NewBB =
MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+ MachineFunction::iterator MBBI = ++OrigBB->getIterator();
MF->insert(MBBI, NewBB);
// Splice the instructions starting with MI over to NewBB.
@@ -967,8 +967,8 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
unsigned NextBlockOffset, NextBlockAlignment;
- MachineFunction::const_iterator NextBlock = Water;
- if (++NextBlock == MF->end()) {
+ MachineFunction::const_iterator NextBlock = ++Water->getIterator();
+ if (NextBlock == MF->end()) {
NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
NextBlockAlignment = 0;
} else {
@@ -1261,7 +1261,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
if (isOffsetInRange(UserOffset, CPEOffset, U)) {
DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
<< format(", expected CPE offset %#x\n", CPEOffset));
- NewMBB = std::next(MachineFunction::iterator(UserMBB));
+ NewMBB = &*++UserMBB->getIterator();
// Add an unconditional branch from UserMBB to fallthrough block. Record
// it for branch lengthening; this new branch will not get out of range,
// but if the preceding conditional branch is out of range, the targets
@@ -1371,8 +1371,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
NewWaterList.insert(NewIsland);
// The new CPE goes before the following block (NewMBB).
- NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+ NewMBB = &*++WaterBB->getIterator();
} else {
// No water found.
// we first see if a longer form of the instrucion could have reached
@@ -1389,7 +1388,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// next iteration for constant pools, but in this context, we don't want
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
- MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+ MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1406,7 +1405,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
WaterList.erase(IP);
// Okay, we know we can put an island before NewMBB now, do it!
- MF->insert(NewMBB, NewIsland);
+ MF->insert(NewMBB->getIterator(), NewIsland);
// Update internal data structures to account for the newly inserted MBB.
updateForInsertedWaterBlock(NewIsland);
@@ -1431,9 +1430,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// Increase the size of the island block to account for the new entry.
BBInfo[NewIsland->getNumber()].Size += Size;
- adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
-
-
+ adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1645,7 +1642,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
MBB->back().eraseFromParent();
// BBInfo[SplitBB].Offset is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = &*++MBB->getIterator();
DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
index b5d52ce..f959bd4 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -7,10 +7,30 @@
//
//===----------------------------------------------------------------------===//
+class DspMMRel;
+
+def Dsp2MicroMips : InstrMapping {
+ let FilterClass = "DspMMRel";
+ // Instructions with the same BaseOpcode and isNVStore values form a row.
+ let RowFields = ["BaseOpcode"];
+ // Instructions with the same predicate sense form a column.
+ let ColFields = ["Arch"];
+ // The key column is the unpredicated instructions.
+ let KeyCol = ["dsp"];
+ // Value columns are PredSense=true and PredSense=false
+ let ValueCols = [["dsp"], ["mmdsp"]];
+}
+
def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP">;
def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
AssemblerPredicate<"FeatureDSPR2">;
+def HasDSPR3 : Predicate<"Subtarget->hasDSPR3()">,
+ AssemblerPredicate<"FeatureDSPR3">;
+
+class ISA_DSPR2 {
+ list<Predicate> InsnPredicates = [HasDSPR2];
+}
// Fields.
class Field6<bits<6> val> {
@@ -20,14 +40,22 @@ class Field6<bits<6> val> {
def SPECIAL3_OPCODE : Field6<0b011111>;
def REGIMM_OPCODE : Field6<0b000001>;
-class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
- let Predicates = [HasDSP];
+class DSPInst<string opstr = "">
+ : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+ string BaseOpcode = opstr;
+ string Arch = "dsp";
}
class PseudoDSP<dag outs, dag ins, list<dag> pattern,
- InstrItinClass itin = IIPseudo>:
- MipsPseudo<outs, ins, pattern, itin> {
- let Predicates = [HasDSP];
+ InstrItinClass itin = IIPseudo>
+ : MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+ let InsnPredicates = [HasDSP];
+}
+
+class DSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+ : InstAlias<Asm, Result, Emit>, PredicateControl {
+ let InsnPredicates = [HasDSP];
}
// ADDU.QB sub-class format.
diff --git a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index d268384..da6f174 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
// ImmLeaf
+def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
+def immZExt7 : ImmLeaf<i32, [{return isUInt<7>(Imm);}]>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
@@ -263,6 +265,7 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -273,6 +276,7 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -293,6 +297,7 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -304,6 +309,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -314,6 +320,7 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -323,6 +330,7 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
list<dag> Pattern = [(set RO:$rd, (OpNode immPat:$imm))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,17 +340,19 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
SDPatternOperator ImmPat, InstrItinClass itin,
- RegisterOperand RO> {
+ RegisterOperand RO, Operand ImmOpnd> {
dag OutOperandList = (outs RO:$rd);
- dag InOperandList = (ins RO:$rt, uimm16:$rs_sa);
+ dag InOperandList = (ins RO:$rt, ImmOpnd:$rs_sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))];
InstrItinClass Itinerary = itin;
bit hasSideEffects = 1;
+ string BaseOpcode = instr_asm;
}
class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -353,6 +363,7 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))];
InstrItinClass Itinerary = itin;
bit mayLoad = 1;
+ string BaseOpcode = instr_asm;
}
class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -363,17 +374,19 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- SDPatternOperator ImmOp, InstrItinClass itin> {
+ Operand ImmOp, SDPatternOperator Imm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
- dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src);
+ dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
list<dag> Pattern = [(set GPR32Opnd:$rt,
- (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))];
+ (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, Imm:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -382,6 +395,7 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -390,15 +404,17 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
dag OutOperandList = (outs ACC64DSPOpnd:$ac);
- dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin);
+ dag InOperandList = (ins simm6:$shift, ACC64DSPOpnd:$acin);
string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -408,6 +424,7 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -417,6 +434,7 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -426,15 +444,17 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass itin> {
dag OutOperandList = (outs);
- dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask);
+ dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -444,6 +464,7 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
(OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -454,6 +475,7 @@ class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))];
InstrItinClass Itinerary = itin;
bit isCommutable = 1;
+ string BaseOpcode = instr_asm;
}
class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -465,6 +487,7 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
(OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
InstrItinClass Itinerary = itin;
string Constraints = "$acin = $ac";
+ string BaseOpcode = instr_asm;
}
class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
@@ -474,6 +497,7 @@ class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))];
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
@@ -481,6 +505,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin>
dag InOperandList = (ins GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
InstrItinClass Itinerary = itin;
+ string BaseOpcode = instr_asm;
}
class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
@@ -506,6 +531,7 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
+ string BaseOpcode = instr_asm;
}
//===----------------------------------------------------------------------===//
@@ -639,7 +665,7 @@ class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
// Shift
class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>,
+ NoItinerary, DSPROpnd, uimm3>,
Defs<[DSPOutFlag22]>;
class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
@@ -647,13 +673,13 @@ class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
Defs<[DSPOutFlag22]>;
class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm3>;
class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
NoItinerary, DSPROpnd>;
class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>,
+ NoItinerary, DSPROpnd, uimm4>,
Defs<[DSPOutFlag22]>;
class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
@@ -661,7 +687,8 @@ class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
Defs<[DSPOutFlag22]>;
class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
- immZExt4, NoItinerary, DSPROpnd>,
+ immZExt4, NoItinerary, DSPROpnd,
+ uimm4>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
@@ -669,19 +696,21 @@ class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
Defs<[DSPOutFlag22]>;
class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm4>;
class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
NoItinerary, DSPROpnd>;
class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
- immZExt4, NoItinerary, DSPROpnd>;
+ immZExt4, NoItinerary, DSPROpnd,
+ uimm4>;
class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
NoItinerary, DSPROpnd>;
class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
- immZExt5, NoItinerary, GPR32Opnd>,
+ immZExt5, NoItinerary, GPR32Opnd,
+ uimm5>,
Defs<[DSPOutFlag22]>;
class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
@@ -689,7 +718,8 @@ class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
Defs<[DSPOutFlag22]>;
class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
- immZExt5, NoItinerary, GPR32Opnd>;
+ immZExt5, NoItinerary, GPR32Opnd,
+ uimm5>;
class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
NoItinerary, GPR32Opnd>;
@@ -1039,32 +1069,33 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
// Shift
class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm3>;
class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
NoItinerary, DSPROpnd>;
class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
- immZExt3, NoItinerary, DSPROpnd>;
+ immZExt3, NoItinerary, DSPROpnd,
+ uimm3>;
class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
NoItinerary, DSPROpnd>;
class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
- NoItinerary, DSPROpnd>;
+ NoItinerary, DSPROpnd, uimm4>;
class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
NoItinerary, DSPROpnd>;
// Misc
-class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
NoItinerary>;
-class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
NoItinerary>;
-class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
- NoItinerary>;
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
+ immZExt5, NoItinerary>;
// Pseudos.
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
@@ -1072,80 +1103,80 @@ def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
// Instruction defs.
// MIPS DSP Rev 1
-def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
-def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
-def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
-def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
-def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
-def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
-def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
-def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
-def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
-def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
-def ADDSC : ADDSC_ENC, ADDSC_DESC;
-def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def ADDU_QB : DspMMRel, ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : DspMMRel, ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : DspMMRel, SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : DspMMRel, SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : DspMMRel, ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : DspMMRel, ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : DspMMRel, SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : DspMMRel, SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : DspMMRel, ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : DspMMRel, SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : DspMMRel, ADDSC_ENC, ADDSC_DESC;
+def ADDWC : DspMMRel, ADDWC_ENC, ADDWC_DESC;
def MODSUB : MODSUB_ENC, MODSUB_DESC;
-def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
-def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
-def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
-def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
-def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
-def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
-def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
-def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
-def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
-def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
-def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
-def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
-def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
-def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
-def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
-def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
-def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
-def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
-def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
-def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
-def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
-def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
-def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
-def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
-def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
-def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
-def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
-def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
-def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
-def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
-def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
-def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
-def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
-def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
-def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
-def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
-def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
-def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def RADDU_W_QB : DspMMRel, RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : DspMMRel, ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : DspMMRel, ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : DspMMRel, PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : DspMMRel, PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : DspMMRel, PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : DspMMRel, PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : DspMMRel, PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : DspMMRel, PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : DspMMRel, PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : DspMMRel, PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : DspMMRel, PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : DspMMRel, PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : DspMMRel, PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : DspMMRel, PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : DspMMRel, PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : DspMMRel, PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : DspMMRel, SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : DspMMRel, SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : DspMMRel, SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : DspMMRel, SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : DspMMRel, SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : DspMMRel, SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : DspMMRel, SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : DspMMRel, SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : DspMMRel, SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : DspMMRel, SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : DspMMRel, SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : DspMMRel, SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : DspMMRel, SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : DspMMRel, SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : DspMMRel, SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : DspMMRel, SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : DspMMRel, MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : DspMMRel, MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : DspMMRel, MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : DspMMRel, MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : DspMMRel, MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
-def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
-def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
-def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
-def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
-def MFHI_DSP : MFHI_ENC, MFHI_DESC;
-def MFLO_DSP : MFLO_ENC, MFLO_DESC;
-def MTHI_DSP : MTHI_ENC, MTHI_DESC;
-def MTLO_DSP : MTLO_ENC, MTLO_DESC;
-def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
-def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
-def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
-def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
-def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
-def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
-def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
-def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
-def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
-def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
-def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
-def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
-def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
-def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def MAQ_S_W_PHL : DspMMRel, MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : DspMMRel, MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : DspMMRel, MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : DspMMRel, MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : DspMMRel, MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : DspMMRel, MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : DspMMRel, MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : DspMMRel, MTLO_ENC, MTLO_DESC;
+def DPAU_H_QBL : DspMMRel, DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DspMMRel, DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DspMMRel, DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DspMMRel, DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DspMMRel, DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DspMMRel, DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DspMMRel, DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DspMMRel, DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : DspMMRel, MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : DspMMRel, MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : DspMMRel, MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : DspMMRel, MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : DspMMRel, MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : DspMMRel, MSUBU_DSP_ENC, MSUBU_DSP_DESC;
def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
@@ -1156,87 +1187,85 @@ def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
def BITREV : BITREV_ENC, BITREV_DESC;
-def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
-def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
-def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
-def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
-def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
-def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
-def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
-def LWX : LWX_ENC, LWX_DESC;
-def LHX : LHX_ENC, LHX_DESC;
-def LBUX : LBUX_ENC, LBUX_DESC;
+def PACKRL_PH : DspMMRel, PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : DspMMRel, REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : DspMMRel, REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : DspMMRel, REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : DspMMRel, REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : DspMMRel, PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : DspMMRel, PICK_PH_ENC, PICK_PH_DESC;
+def LWX : DspMMRel, LWX_ENC, LWX_DESC;
+def LHX : DspMMRel, LHX_ENC, LHX_DESC;
+def LBUX : DspMMRel, LBUX_ENC, LBUX_DESC;
def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
-def INSV : INSV_ENC, INSV_DESC;
-def EXTP : EXTP_ENC, EXTP_DESC;
-def EXTPV : EXTPV_ENC, EXTPV_DESC;
-def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
-def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
-def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
-def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
-def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
-def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
-def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
-def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
-def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
-def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
-def SHILO : SHILO_ENC, SHILO_DESC;
-def SHILOV : SHILOV_ENC, SHILOV_DESC;
-def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
-def RDDSP : RDDSP_ENC, RDDSP_DESC;
-def WRDSP : WRDSP_ENC, WRDSP_DESC;
+def INSV : DspMMRel, INSV_ENC, INSV_DESC;
+def EXTP : DspMMRel, EXTP_ENC, EXTP_DESC;
+def EXTPV : DspMMRel, EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : DspMMRel, EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : DspMMRel, EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : DspMMRel, EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : DspMMRel, EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : DspMMRel, EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : DspMMRel, EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : DspMMRel, EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : DspMMRel, EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : DspMMRel, EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : DspMMRel, EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : DspMMRel, SHILO_ENC, SHILO_DESC;
+def SHILOV : DspMMRel, SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : DspMMRel, MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : DspMMRel, RDDSP_ENC, RDDSP_DESC;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def WRDSP : WRDSP_ENC, WRDSP_DESC;
+}
// MIPS DSP Rev 2
-let Predicates = [HasDSPR2] in {
-
-def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
-def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
-def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
-def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
-def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
-def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
-def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
-def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
-def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
-def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
-def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
-def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
-def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
-def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
-def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
-def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
-def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
-def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
-def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
-def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
-def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
-def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
-def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
-def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
-def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
-def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
-def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
-def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
-def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
-def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
-def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
-def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
-def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
-def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
-def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
-def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
-def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
-def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
-def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
-def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
-def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
-def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
-def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
-def APPEND : APPEND_ENC, APPEND_DESC;
-def BALIGN : BALIGN_ENC, BALIGN_DESC;
-def PREPEND : PREPEND_ENC, PREPEND_DESC;
-
-}
+def ADDU_PH : DspMMRel, ADDU_PH_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH : DspMMRel, ADDU_S_PH_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def SUBU_PH : DspMMRel, SUBU_PH_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH : DspMMRel, SUBU_S_PH_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC, ISA_DSPR2;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC, ISA_DSPR2;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC, ISA_DSPR2;
+def ABSQ_S_QB : DspMMRel, ABSQ_S_QB_ENC, ABSQ_S_QB_DESC, ISA_DSPR2;
+def ADDUH_QB : DspMMRel, ADDUH_QB_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB : DspMMRel, ADDUH_R_QB_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def SUBUH_QB : DspMMRel, SUBUH_QB_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB : DspMMRel, SUBUH_R_QB_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def ADDQH_PH : DspMMRel, ADDQH_PH_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH : DspMMRel, ADDQH_R_PH_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_PH : DspMMRel, SUBQH_PH_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH : DspMMRel, SUBQH_R_PH_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W : DspMMRel, ADDQH_W_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W : DspMMRel, ADDQH_R_W_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def SUBQH_W : DspMMRel, SUBQH_W_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W : DspMMRel, SUBQH_R_W_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def MUL_PH : DspMMRel, MUL_PH_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH : DspMMRel, MUL_S_PH_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W : DspMMRel, MULQ_S_W_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def MULQ_RS_W : DspMMRel, MULQ_RS_W_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH : DspMMRel, MULQ_S_PH_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def DPA_W_PH : DspMMRel, DPA_W_PH_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPS_W_PH : DspMMRel, DPS_W_PH_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH : DspMMRel, DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC, ISA_DSPR2;
+def DPAQX_SA_W_PH : DspMMRel, DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC, ISA_DSPR2;
+def DPAX_W_PH : DspMMRel, DPAX_W_PH_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def DPSX_W_PH : DspMMRel, DPSX_W_PH_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH : DspMMRel, DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC, ISA_DSPR2;
+def DPSQX_SA_W_PH : DspMMRel, DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC, ISA_DSPR2;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC, ISA_DSPR2;
+def PRECR_QB_PH : DspMMRel, PRECR_QB_PH_ENC, PRECR_QB_PH_DESC, ISA_DSPR2;
+def PRECR_SRA_PH_W : DspMMRel, PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W : DspMMRel, PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def SHRA_QB : DspMMRel, SHRA_QB_ENC, SHRA_QB_DESC, ISA_DSPR2;
+def SHRAV_QB : DspMMRel, SHRAV_QB_ENC, SHRAV_QB_DESC, ISA_DSPR2;
+def SHRA_R_QB : DspMMRel, SHRA_R_QB_ENC, SHRA_R_QB_DESC, ISA_DSPR2;
+def SHRAV_R_QB : DspMMRel, SHRAV_R_QB_ENC, SHRAV_R_QB_DESC, ISA_DSPR2;
+def SHRL_PH : DspMMRel, SHRL_PH_ENC, SHRL_PH_DESC, ISA_DSPR2;
+def SHRLV_PH : DspMMRel, SHRLV_PH_ENC, SHRLV_PH_DESC, ISA_DSPR2;
+def APPEND : APPEND_ENC, APPEND_DESC, ISA_DSPR2;
+def BALIGN : BALIGN_ENC, BALIGN_DESC, ISA_DSPR2;
+def PREPEND : DspMMRel, PREPEND_ENC, PREPEND_DESC, ISA_DSPR2;
// Pseudos.
let isPseudo = 1, isCodeGenOnly = 1 in {
@@ -1415,3 +1444,8 @@ let AddedComplexity = 20 in {
def : IndexedLoadPat<sextloadi16, LHX>;
def : IndexedLoadPat<load, LWX>;
}
+
+// Instruction alias.
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : DSPInstAlias<"wrdsp $rt", (WRDSP GPR32Opnd:$rt, 0x1F), 1>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 4faeb33..8313d90 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -355,9 +355,8 @@ void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
SE = MBB.succ_end(); SI != SE; ++SI)
if (*SI != &SuccBB)
- for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
- LE = (*SI)->livein_end(); LI != LE; ++LI)
- Uses.set(*LI);
+ for (const auto &LI : (*SI)->liveins())
+ Uses.set(LI.PhysReg);
}
bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
@@ -431,7 +430,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
(*MI.memoperands_begin())->getPseudoValue()) {
if (isa<FixedStackPseudoSourceValue>(PSV))
return false;
- return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack();
+ return !PSV->isConstant(nullptr) && !PSV->isStack();
}
return true;
@@ -598,7 +597,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
// Get instruction with delay slot.
MachineBasicBlock::instr_iterator DSI(I);
- if (InMicroMipsMode && TII->GetInstSizeInBytes(std::next(DSI)) == 2 &&
+ if (InMicroMipsMode && TII->GetInstSizeInBytes(&*std::next(DSI)) == 2 &&
DSI->isCall()) {
// If instruction in delay slot is 16b change opcode to
// corresponding instruction with short delay slot.
@@ -713,8 +712,9 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
if (DisableBackwardSearch)
return false;
- RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
- MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
+ auto *Fn = MBB.getParent();
+ RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
+ MemDefsUses MemDU(Fn->getDataLayout(), Fn->getFrameInfo());
ReverseIter Filler;
RegDU.init(*Slot);
@@ -763,6 +763,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
BB2BrMap BrMap;
std::unique_ptr<InspectMemInstr> IM;
Iter Filler;
+ auto *Fn = MBB.getParent();
// Iterate over SuccBB's predecessor list.
for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
@@ -772,15 +773,15 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
// Do not allow moving instructions which have unallocatable register operands
// across basic block boundaries.
- RegDU.setUnallocatableRegs(*MBB.getParent());
+ RegDU.setUnallocatableRegs(*Fn);
// Only allow moving loads from stack or constants if any of the SuccBB's
// predecessors have multiple successors.
if (HasMultipleSuccs) {
IM.reset(new LoadFromStackOrConst());
} else {
- const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
- IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
+ const MachineFrameInfo *MFI = Fn->getFrameInfo();
+ IM.reset(new MemDefsUses(Fn->getDataLayout(), MFI));
}
if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
@@ -800,12 +801,13 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
// Select the successor with the larget edge weight.
auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
- MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(),
- [&](const MachineBasicBlock *Dst0,
- const MachineBasicBlock *Dst1) {
- return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1);
- });
- return S->isLandingPad() ? nullptr : S;
+ MachineBasicBlock *S = *std::max_element(
+ B.succ_begin(), B.succ_end(),
+ [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) {
+ return Prob.getEdgeProbability(&B, Dst0) <
+ Prob.getEdgeProbability(&B, Dst1);
+ });
+ return S->isEHPad() ? nullptr : S;
}
std::pair<MipsInstrInfo::BranchType, MachineInstr *>
diff --git a/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
new file mode 100644
index 0000000..11e191a
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -0,0 +1,84 @@
+//===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips32r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class MipsEVAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl, StdArch {
+ let DecoderNamespace = "Mips";
+ let EncodingPredicates = [HasStdEnc];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Field Values
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA
+def OPCODE6_LBE : OPCODE6<0b101100>;
+def OPCODE6_LBuE : OPCODE6<0b101000>;
+def OPCODE6_LHE : OPCODE6<0b101101>;
+def OPCODE6_LHuE : OPCODE6<0b101001>;
+def OPCODE6_LWE : OPCODE6<0b101111>;
+
+def OPCODE6_SBE : OPCODE6<0b011100>;
+def OPCODE6_SHE : OPCODE6<0b011101>;
+def OPCODE6_SWE : OPCODE6<0b011111>;
+
+// load/store left/right EVA
+def OPCODE6_LWLE : OPCODE6<0b011001>;
+def OPCODE6_LWRE : OPCODE6<0b011010>;
+def OPCODE6_SWLE : OPCODE6<0b100001>;
+def OPCODE6_SWRE : OPCODE6<0b100010>;
+
+// Load-linked EVA, Store-conditional EVA
+def OPCODE6_LLE : OPCODE6<0b101110>;
+def OPCODE6_SCE : OPCODE6<0b011110>;
+
+def OPCODE6_TLBINV : OPCODE6<0b000011>;
+def OPCODE6_TLBINVF : OPCODE6<0b000100>;
+
+def OPCODE6_CACHEE : OPCODE6<0b011011>;
+def OPCODE6_PREFE : OPCODE6<0b100011>;
+
+def OPGROUP_COP0 : OPGROUP<0b010000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Encoding Formats
+//
+//===----------------------------------------------------------------------===//
+
+class SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6 Operation> : MipsEVAInst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-7} = offset;
+ let Inst{6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class TLB_FM<OPCODE6 Operation> : MipsEVAInst {
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP0.Value;
+ let Inst{25} = 1; // CO
+ let Inst{24-6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
new file mode 100644
index 0000000..36c9694
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -0,0 +1,192 @@
+//===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips EVA ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction encodings
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA encodings
+class LBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBE>;
+class LBuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBuE>;
+class LHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHE>;
+class LHuE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHuE>;
+class LWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWE>;
+
+class SBE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SBE>;
+class SHE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SHE>;
+class SWE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWE>;
+
+// load/store left/right EVA encodings
+class LWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWLE>;
+class LWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWRE>;
+class SWLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWLE>;
+class SWRE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWRE>;
+
+// Load-linked EVA, Store-conditional EVA encodings
+class LLE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LLE>;
+class SCE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SCE>;
+
+class TLBINV_ENC : TLB_FM<OPCODE6_TLBINV>;
+class TLBINVF_ENC : TLB_FM<OPCODE6_TLBINVF>;
+
+class CACHEE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_CACHEE>;
+class PREFE_ENC : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_PREFE>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction descriptions
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA descriptions
+class LOAD_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ bit canFoldAsLoad = 1;
+ bit mayLoad = 1;
+}
+
+class LBE_DESC : LOAD_EVA_DESC_BASE<"lbe", GPR32Opnd>;
+class LBuE_DESC : LOAD_EVA_DESC_BASE<"lbue", GPR32Opnd>;
+class LHE_DESC : LOAD_EVA_DESC_BASE<"lhe", GPR32Opnd>;
+class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd>;
+class LWE_DESC : LOAD_EVA_DESC_BASE<"lwe", GPR32Opnd>;
+
+class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator OpNode = null_frag> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ bit mayStore = 1;
+}
+
+class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd>;
+class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd>;
+class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd>;
+
+// Load/Store Left/Right EVA descriptions
+class LOAD_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr, GPROpnd:$src);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+ string Constraints = "$src = $rt";
+ bit canFoldAsLoad = 1;
+}
+
+class LWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwle", GPR32Opnd>;
+class LWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwre", GPR32Opnd>;
+
+class STORE_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class SWLE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swle", GPR32Opnd>;
+class SWRE_DESC : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swre", GPR32Opnd>;
+
+// Load-linked EVA, Store-conditional EVA descriptions
+class LLE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class LLE_DESC : LLE_DESC_BASE<"lle", GPR32Opnd>;
+
+class SCE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$dst);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+ string Constraints = "$rt = $dst";
+ string DecoderMethod = "DecodeMemEVA";
+}
+
+class SCE_DESC : SCE_DESC_BASE<"sce", GPR32Opnd>;
+
+class TLB_DESC_BASE<string instr_asm> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins);
+ string AsmString = instr_asm;
+ list<dag> Pattern = [];
+}
+
+class TLBINV_DESC : TLB_DESC_BASE<"tlbinv">;
+class TLBINVF_DESC : TLB_DESC_BASE<"tlbinvf">;
+
+class CACHEE_DESC_BASE<string instr_asm, Operand MemOpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
+ string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+ list<dag> Pattern = [];
+ string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
+}
+
+class CACHEE_DESC : CACHEE_DESC_BASE<"cachee", mem>;
+class PREFE_DESC : CACHEE_DESC_BASE<"prefe", mem>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+/// Load and Store EVA Instructions
+def LBE : LBE_ENC, LBE_DESC, INSN_EVA;
+def LBuE : LBuE_ENC, LBuE_DESC, INSN_EVA;
+def LHE : LHE_ENC, LHE_DESC, INSN_EVA;
+def LHuE : LHuE_ENC, LHuE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWE : LWE_ENC, LWE_DESC, INSN_EVA;
+}
+def SBE : SBE_ENC, SBE_DESC, INSN_EVA;
+def SHE : SHE_ENC, SHE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def SWE : SWE_ENC, SWE_DESC, INSN_EVA;
+}
+
+/// load/store left/right EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWLE : LWLE_ENC, LWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def LWRE : LWRE_ENC, LWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWLE : SWLE_ENC, SWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWRE : SWRE_ENC, SWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+}
+
+/// Load-linked EVA, Store-conditional EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LLE : LLE_ENC, LLE_DESC, INSN_EVA;
+def SCE : SCE_ENC, SCE_DESC, INSN_EVA;
+}
+
+def TLBINV : TLBINV_ENC, TLBINV_DESC, INSN_EVA;
+def TLBINVF : TLBINVF_ENC, TLBINVF_DESC, INSN_EVA;
+
+def CACHEE : CACHEE_ENC, CACHEE_DESC, INSN_EVA;
+def PREFE : PREFE_ENC, PREFE_DESC, INSN_EVA;
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index 5152a07..e9eaf81 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -192,10 +192,10 @@ public:
TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) {
MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
Context = &funcInfo.Fn->getContext();
+ bool ISASupported = !Subtarget->hasMips32r6() && Subtarget->hasMips32();
TargetSupported =
- ((TM.getRelocationModel() == Reloc::PIC_) &&
- ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
- (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32())));
+ ISASupported && (TM.getRelocationModel() == Reloc::PIC_) &&
+ (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32());
UnsupportedFPMode = Subtarget->isFP64bit();
}
@@ -236,32 +236,36 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
std::swap(LHS, RHS);
unsigned Opc;
- if (ISDOpc == ISD::AND) {
+ switch (ISDOpc) {
+ case ISD::AND:
Opc = Mips::AND;
- } else if (ISDOpc == ISD::OR) {
+ break;
+ case ISD::OR:
Opc = Mips::OR;
- } else if (ISDOpc == ISD::XOR) {
+ break;
+ case ISD::XOR:
Opc = Mips::XOR;
- } else
+ break;
+ default:
llvm_unreachable("unexpected opcode");
+ }
unsigned LHSReg = getRegForValue(LHS);
- unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
- if (!ResultReg)
- return 0;
-
- unsigned RHSReg;
if (!LHSReg)
return 0;
+ unsigned RHSReg;
if (const auto *C = dyn_cast<ConstantInt>(RHS))
RHSReg = materializeInt(C, MVT::i32);
else
RHSReg = getRegForValue(RHS);
-
if (!RHSReg)
return 0;
+ unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+ if (!ResultReg)
+ return 0;
+
emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
return ResultReg;
}
@@ -747,7 +751,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Offset = Addr.getOffset();
MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addFrameIndex(FI)
@@ -798,7 +802,7 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Offset = Addr.getOffset();
MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), Align);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
.addReg(SrcReg)
@@ -912,8 +916,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
.addReg(CondReg)
.addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
return false;
@@ -1057,22 +1060,16 @@ bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
// entirely within FPRs.
unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
- unsigned Opc;
-
- if (SrcVT == MVT::f32)
- Opc = Mips::TRUNC_W_S;
- else
- Opc = Mips::TRUNC_W_D32;
+ unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32;
// Generate the convert.
emitInst(Opc, TempReg).addReg(SrcReg);
-
emitInst(Mips::MFC1, DestReg).addReg(TempReg);
updateValueMap(I, DestReg);
return true;
}
-//
+
bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
SmallVectorImpl<MVT> &OutVTs,
unsigned &NumBytes) {
@@ -1196,7 +1193,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(Addr.getOffset()),
+ MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
(void)(MMO);
// if (!emitStore(ArgVT, ArgReg, Addr, MMO))
@@ -1607,19 +1604,23 @@ bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg) {
+ int64_t Imm;
+
switch (SrcVT.SimpleTy) {
default:
return false;
case MVT::i1:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1);
+ Imm = 1;
break;
case MVT::i8:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff);
+ Imm = 0xff;
break;
case MVT::i16:
- emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff);
+ Imm = 0xffff;
break;
}
+
+ emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm);
return true;
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index fab2fdf..6756c17 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -117,6 +117,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
+ case MipsISD::ERet: return "MipsISD::ERet";
case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN";
case MipsISD::FPBrcond: return "MipsISD::FPBrcond";
case MipsISD::FPCmp: return "MipsISD::FPCmp";
@@ -390,10 +391,10 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ if (!Subtarget.isGP64bit()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+ }
setInsertFencesForAtomic(true);
@@ -437,9 +438,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP);
- setExceptionPointerRegister(ABI.IsN64() ? Mips::A0_64 : Mips::A0);
- setExceptionSelectorRegister(ABI.IsN64() ? Mips::A1_64 : Mips::A1);
-
MaxStoresPerMemcpy = 16;
isMicroMips = Subtarget.inMicroMipsMode();
@@ -836,6 +834,14 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return SDValue();
}
+bool MipsTargetLowering::isCheapToSpeculateCttz() const {
+ return Subtarget.hasMips32();
+}
+
+bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
+ return Subtarget.hasMips32();
+}
+
void
MipsTargetLowering::LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
@@ -1092,8 +1098,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
@@ -1204,8 +1209,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loopMBB);
MF->insert(It, sinkMBB);
MF->insert(It, exitMBB);
@@ -1330,15 +1334,20 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
DebugLoc DL = MI->getDebugLoc();
unsigned LL, SC, ZERO, BNE, BEQ;
- if (Size == 4) {
- LL = isMicroMips ? Mips::LL_MM : Mips::LL;
- SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ if (Size == 4) {
+ if (isMicroMips) {
+ LL = Mips::LL_MM;
+ SC = Mips::SC_MM;
+ } else {
+ LL = Subtarget.hasMips32r6() ? Mips::LL_R6 : Mips::LL;
+ SC = Subtarget.hasMips32r6() ? Mips::SC_R6 : Mips::SC;
+ }
ZERO = Mips::ZERO;
BNE = Mips::BNE;
BEQ = Mips::BEQ;
} else {
- LL = Mips::LLD;
- SC = Mips::SCD;
+ LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
+ SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
ZERO = Mips::ZERO_64;
BNE = Mips::BNE64;
BEQ = Mips::BEQ64;
@@ -1356,8 +1365,7 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
@@ -1440,8 +1448,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, sinkMBB);
@@ -1586,9 +1593,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
- MachinePointerInfo::getJumpTable(), MemVT, false, false,
- false, 0);
+ Addr =
+ DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+ MemVT, false, false, false, 0);
Chain = Addr.getValue(1);
if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || ABI.IsN64()) {
@@ -1690,14 +1698,15 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
if (LargeGOT)
- return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
- MipsII::MO_GOT_LO16, DAG.getEntryNode(),
- MachinePointerInfo::getGOT());
+ return getAddrGlobalLargeGOT(
+ N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16,
+ DAG.getEntryNode(),
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
- return getAddrGlobal(N, SDLoc(N), Ty, DAG,
- (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP
- : MipsII::MO_GOT16,
- DAG.getEntryNode(), MachinePointerInfo::getGOT());
+ return getAddrGlobal(
+ N, SDLoc(N), Ty, DAG,
+ (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
+ DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction()));
}
SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
@@ -1719,6 +1728,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
// Local Exec TLS Model.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -1813,7 +1825,8 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
- if (TLOF->IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+ if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(),
+ getTargetMachine()))
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
@@ -2946,8 +2959,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
- Function::const_arg_iterator FuncArg =
- DAG.getMachineFunction().getFunction()->arg_begin();
+ const Function *Func = DAG.getMachineFunction().getFunction();
+ Function::const_arg_iterator FuncArg = Func->arg_begin();
+
+ if (Func->hasFnAttribute("interrupt") && !Func->arg_empty())
+ report_fatal_error(
+ "Functions with the interrupt attribute cannot have arguments!");
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -3019,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// We ought to be able to use LocVT directly but O32 sets it to i32
// when allocating floating point values to integer registers.
// This shouldn't influence how we load the value into registers unless
- // we are targetting softfloat.
+ // we are targeting softfloat.
if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat())
LocVT = VA.getValVT();
}
@@ -3033,9 +3050,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// Create load nodes to retrieve arguments from the stack
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ SDValue ArgValue = DAG.getLoad(
+ LocVT, DL, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, false, 0);
OutChains.push_back(ArgValue.getValue(1));
ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
@@ -3098,8 +3116,20 @@ MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
}
SDValue
-MipsTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool IsVarArg,
+MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+ SDLoc DL, SelectionDAG &DAG) const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+ MipsFI->setISR();
+
+ return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps);
+}
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
SDLoc DL, SelectionDAG &DAG) const {
@@ -3192,7 +3222,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- // Return on Mips is always a "jr $ra"
+ // ISRs must use "eret".
+ if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt"))
+ return LowerInterruptReturn(RetOps, DL, DAG);
+
+ // Standard return on Mips is a "jr $ra"
return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps);
}
@@ -3300,7 +3334,7 @@ static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
// Search for the first numeric character.
StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
- I = std::find_if(B, E, std::ptr_fun(isdigit));
+ I = std::find_if(B, E, isdigit);
Prefix = StringRef(B, I - B);
@@ -3669,7 +3703,7 @@ void MipsTargetLowering::passByValArg(
unsigned NumRegs = LastReg - FirstReg;
if (NumRegs) {
- const ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
+ ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
unsigned I = 0;
@@ -3755,7 +3789,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
SDValue Chain, SDLoc DL,
SelectionDAG &DAG,
CCState &State) const {
- const ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
+ ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
unsigned Idx = State.getFirstUnallocated(ArgRegs);
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
@@ -3812,7 +3846,7 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
if (State->getCallingConv() != CallingConv::Fast) {
unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
- const ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
+ ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
// FIXME: The O32 case actually describes no shadow registers.
const MCPhysReg *ShadowRegs =
ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs;
@@ -3860,8 +3894,7 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index b3d861d..b33e125 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -67,6 +67,10 @@ namespace llvm {
// Return
Ret,
+ // Interrupt, exception, error trap Return
+ ERet,
+
+ // Software Exception Return.
EH_RETURN,
// Node used to extract integer from accumulator.
@@ -231,6 +235,9 @@ namespace llvm {
return MVT::i32;
}
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+
void LowerOperationWrapper(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -258,17 +265,25 @@ namespace llvm {
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const override;
- struct LTStr {
- bool operator()(const char *S1, const char *S2) const {
- return strcmp(S1, S2) < 0;
- }
- };
-
void HandleByVal(CCState *, unsigned &, unsigned) const override;
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return ABI.IsN64() ? Mips::A0_64 : Mips::A0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
+ }
+
/// Returns true if a cast between SrcAS and DestAS is a noop.
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
// Mips doesn't have any special address spaces so we just reserve
@@ -290,9 +305,10 @@ namespace llvm {
unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(N, Ty, DAG, GOTFlag));
- SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
- MachinePointerInfo::getGOT(), false, false,
- false, 0);
+ SDValue Load =
+ DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
unsigned LoFlag = IsN32OrN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty,
getTargetNode(N, Ty, DAG, LoFlag));
@@ -487,6 +503,9 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, SDLoc DL,
+ SelectionDAG &DAG) const;
+
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
// Inline asm support
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index cb91225..377260f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -136,7 +136,7 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
multiclass ROUND_M<string opstr, InstrItinClass Itin> {
def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>, FGR_32;
- def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
+ def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
let DecoderNamespace = "Mips64";
}
}
@@ -267,24 +267,25 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
-def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
ABSS_FM<0xc, 16>, ISA_MIPS2;
-def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
+defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
+def TRUNC_W_S : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
ABSS_FM<0xd, 16>, ISA_MIPS2;
-def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+def CEIL_W_S : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
ABSS_FM<0xe, 16>, ISA_MIPS2;
-def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
+def FLOOR_W_S : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
ABSS_FM<0xf, 16>, ISA_MIPS2;
def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x24, 16>;
-defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2;
defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2;
defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>;
let DecoderNamespace = "Mips64" in {
+ let AdditionalPredicates = [NotInMicroMips] in {
def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>,
ABSS_FM<0x8, 16>, FGR_64;
def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>,
@@ -301,14 +302,17 @@ let DecoderNamespace = "Mips64" in {
ABSS_FM<0xb, 16>, FGR_64;
def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>,
ABSS_FM<0xb, 17>, FGR_64;
+ }
}
def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x20, 20>;
-def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
-def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+let AdditionalPredicates = [NotInMicroMips] in{
+ def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
+ ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
+ def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+}
def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
ABSS_FM<0x20, 17>, FGR_32;
@@ -320,8 +324,10 @@ def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>,
let DecoderNamespace = "Mips64" in {
def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>,
ABSS_FM<0x20, 17>, FGR_64;
- def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x20, 21>, FGR_64;
+ let AdditionalPredicates = [NotInMicroMips] in{
+ def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
+ ABSS_FM<0x20, 21>, FGR_64;
+ }
def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x21, 20>, FGR_64;
def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>,
@@ -345,8 +351,8 @@ def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>,
defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>;
defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>;
-def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>,
- ABSS_FM<0x4, 16>, ISA_MIPS2;
+def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd,
+ II_SQRT_S, fsqrt>, ABSS_FM<0x4, 16>, ISA_MIPS2;
defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2;
// The odd-numbered registers are only referenced when doing loads,
@@ -503,13 +509,13 @@ let AdditionalPredicates = [NoNaNsFPMath],
def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
-def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
+def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>,
BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>,
+def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, II_BC1FL, MIPS_BRANCH_F, 0>,
BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6;
-def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
+def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>,
BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
+def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
/// Floating Point Compare
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index 5f4fcc3..45baf27 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -132,7 +132,7 @@ class PseudoSE<dag outs, dag ins, list<dag> pattern,
// These are aliases that require C++ handling to convert to the target
// instruction, while InstAliases can be handled directly by tblgen.
class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
- MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
+ MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo>, PredicateControl {
let isPseudo = 1;
let Pattern = [];
}
@@ -644,16 +644,16 @@ class BRK_FM<bits<6> funct> : StdArch
// Exception return format <Cop0|1|0|funct>
//===----------------------------------------------------------------------===//
-class ER_FM<bits<6> funct> : StdArch
+class ER_FM<bits<6> funct, bit LLBit> : StdArch
{
bits<32> Inst;
let Inst{31-26} = 0x10;
let Inst{25} = 1;
- let Inst{24-6} = 0;
+ let Inst{24-7} = 0;
+ let Inst{6} = LLBit;
let Inst{5-0} = funct;
}
-
//===----------------------------------------------------------------------===//
// Enable/disable interrupt instruction format <Cop0|MFMC0|rt|12|0|sc|0|0>
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index bb23cc0..b1d6950 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -60,8 +60,8 @@ MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
- return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
- MFI.getObjectSize(FI), Align);
+ return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+ Flag, MFI.getObjectSize(FI), Align);
}
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index ab98c90..d9fb8c8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -77,6 +77,9 @@ def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def MipsERet : SDNode<"MipsISD::ERet", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>;
+
// These are target-independent nodes, but have target-specific formats.
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
[SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
@@ -157,7 +160,7 @@ def HasMips3 : Predicate<"Subtarget->hasMips3()">,
def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
AssemblerPredicate<"FeatureMips4_32">;
def NotMips4_32 : Predicate<"!Subtarget->hasMips4_32()">,
- AssemblerPredicate<"FeatureMips4_32">;
+ AssemblerPredicate<"!FeatureMips4_32">;
def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
AssemblerPredicate<"FeatureMips4_32r2">;
def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
@@ -166,6 +169,8 @@ def HasMips32 : Predicate<"Subtarget->hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
+def HasMips32r5 : Predicate<"Subtarget->hasMips32r5()">,
+ AssemblerPredicate<"FeatureMips32r5">;
def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">,
AssemblerPredicate<"FeatureMips32r6">;
def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">,
@@ -176,6 +181,8 @@ def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">,
AssemblerPredicate<"!FeatureGP64Bit">;
def HasMips64 : Predicate<"Subtarget->hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
+def NotMips64 : Predicate<"!Subtarget->hasMips64()">,
+ AssemblerPredicate<"!FeatureMips64">;
def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">,
AssemblerPredicate<"FeatureMips64r2">;
def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
@@ -184,6 +191,8 @@ def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
AssemblerPredicate<"!FeatureMips64r6">;
def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">,
AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">;
+def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">,
+ AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">;
def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
@@ -201,6 +210,12 @@ def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">,
def IsLE : Predicate<"Subtarget->isLittle()">;
def IsBE : Predicate<"!Subtarget->isLittle()">;
def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
+def UseTCCInDIV : AssemblerPredicate<"FeatureUseTCCInDIV">;
+def HasEVA : Predicate<"Subtarget->hasEVA()">,
+ AssemblerPredicate<"FeatureEVA,FeatureMips32r2">;
+def HasMSA : Predicate<"Subtarget->hasMSA()">,
+ AssemblerPredicate<"FeatureMSA">;
+
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
@@ -242,6 +257,7 @@ class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
class ISA_MIPS32R2_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6];
}
+class ISA_MIPS32R5 { list<Predicate> InsnPredicates = [HasMips32r5]; }
class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; }
class ISA_MIPS64_NOT_64R6 {
list<Predicate> InsnPredicates = [HasMips64, NotMips64r6];
@@ -249,9 +265,21 @@ class ISA_MIPS64_NOT_64R6 {
class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
+class ISA_MICROMIPS { list<Predicate> InsnPredicates = [InMicroMips]; }
class ISA_MICROMIPS32R6 {
list<Predicate> InsnPredicates = [HasMicroMips32r6];
}
+class ISA_MICROMIPS64R6 {
+ list<Predicate> InsnPredicates = [HasMicroMips64r6];
+}
+class ISA_MICROMIPS32_NOT_MIPS32R6 {
+ list<Predicate> InsnPredicates = [InMicroMips, NotMips32r6];
+}
+
+class INSN_EVA { list<Predicate> InsnPredicates = [HasEVA]; }
+class INSN_EVA_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [NotMips32r6, NotMips64r6, HasEVA];
+}
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
@@ -283,6 +311,28 @@ class INSN_MIPS5_32R2_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6];
}
+class ASE_CNMIPS {
+ list<Predicate> InsnPredicates = [HasCnMips];
+}
+
+class ASE_MSA {
+ list<Predicate> InsnPredicates = [HasMSA];
+}
+
+class ASE_MSA_NOT_MSA64 {
+ list<Predicate> InsnPredicates = [HasMSA, NotMips64];
+}
+
+class ASE_MSA64 {
+ list<Predicate> InsnPredicates = [HasMSA, HasMips64];
+}
+
+// Class used for separating microMIPSr6 and microMIPS (r3) instruction.
+// It can be used only on instructions that doesn't inherit PredicateControl.
+class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl {
+ let InsnPredicates = [InMicroMips, NotMips32r6, NotMips64r6];
+}
+
//===----------------------------------------------------------------------===//
class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
@@ -335,6 +385,81 @@ include "MipsInstrFormats.td"
// Mips Operand, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
+class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+ : AsmOperandClass {
+ let Name = "ConstantSImm" # Bits;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isConstantSImm<" # Bits # ">";
+ let SuperClasses = Supers;
+ let DiagnosticType = "SImm" # Bits;
+}
+
+class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+ int Offset = 0> : AsmOperandClass {
+ let Name = "ConstantUImm" # Bits # "_" # Offset;
+ let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
+ let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
+ let SuperClasses = Supers;
+ let DiagnosticType = "UImm" # Bits # "_" # Offset;
+}
+
+def ConstantUImm10AsmOperandClass
+ : ConstantUImmAsmOperandClass<10, []>;
+def ConstantUImm8AsmOperandClass
+ : ConstantUImmAsmOperandClass<8, [ConstantUImm10AsmOperandClass]>;
+def ConstantUImm7AsmOperandClass
+ : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass]>;
+def ConstantUImm6AsmOperandClass
+ : ConstantUImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantSImm6AsmOperandClass
+ : ConstantSImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantUImm5Plus1AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 1>;
+def ConstantUImm5Plus32AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>;
+def ConstantUImm5Plus33AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 33>;
+def ConstantUImm5Plus32NormalizeAsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> {
+ let Name = "ConstantUImm5_32_Norm";
+ // We must also subtract 32 when we render the operand.
+ let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
+}
+def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
+ let Name = "UImm5Lsl2";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isScaledUImm<5, 2>";
+ let SuperClasses = [ConstantUImm6AsmOperandClass];
+ let DiagnosticType = "UImm5_Lsl2";
+}
+def ConstantUImm5ReportUImm6AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> {
+ let Name = "ConstantUImm5_0_Report_UImm6";
+ let DiagnosticType = "UImm5_0_Report_UImm6";
+}
+def ConstantUImm5AsmOperandClass
+ : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>;
+def ConstantUImm4AsmOperandClass
+ : ConstantUImmAsmOperandClass<
+ 4, [ConstantUImm5AsmOperandClass,
+ ConstantUImm5Plus32AsmOperandClass,
+ ConstantUImm5Plus32NormalizeAsmOperandClass]>;
+def ConstantUImm3AsmOperandClass
+ : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>;
+def ConstantUImm2Plus1AsmOperandClass
+ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>;
+def ConstantUImm2AsmOperandClass
+ : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
+def ConstantUImm1AsmOperandClass
+ : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
+def ConstantImmzAsmOperandClass : AsmOperandClass {
+ let Name = "ConstantImmz";
+ let RenderMethod = "addConstantUImmOperands<1>";
+ let PredicateMethod = "isConstantImmz";
+ let SuperClasses = [ConstantUImm1AsmOperandClass];
+ let DiagnosticType = "Immz";
+}
+
def MipsJumpTargetAsmOperand : AsmOperandClass {
let Name = "JumpTarget";
let ParserMethod = "parseJumpTarget";
@@ -360,6 +485,10 @@ def calltarget : Operand<iPTR> {
def imm64: Operand<i64>;
+def simm6 : Operand<i32> {
+ let ParserMatchClass = ConstantSImm6AsmOperandClass;
+ let OperandType = "OPERAND_IMMEDIATE";
+}
def simm9 : Operand<i32>;
def simm10 : Operand<i32>;
def simm11 : Operand<i32>;
@@ -380,23 +509,12 @@ def simm18_lsl3 : Operand<i32> {
let ParserMatchClass = MipsJumpTargetAsmOperand;
}
-def simm20 : Operand<i32> {
-}
+def simm20 : Operand<i32>;
+def simm32 : Operand<i32>;
def uimm20 : Operand<i32> {
}
-def MipsUImm10AsmOperand : AsmOperandClass {
- let Name = "UImm10";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseImm";
- let PredicateMethod = "isUImm<10>";
-}
-
-def uimm10 : Operand<i32> {
- let ParserMatchClass = MipsUImm10AsmOperand;
-}
-
def simm16_64 : Operand<i64> {
let DecoderMethod = "DecodeSimm16";
}
@@ -404,23 +522,71 @@ def simm16_64 : Operand<i64> {
// Zero
def uimmz : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantImmzAsmOperandClass;
+}
+
+// Unsigned Operands
+foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10} in
+ def uimm # I : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+ }
+
+def uimm2_plus1 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<2, 1>";
+ let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass;
}
-// Unsigned Operand
-def uimm2 : Operand<i32> {
+def uimm5_plus1 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+ let ParserMatchClass = ConstantUImm5Plus1AsmOperandClass;
}
-def uimm3 : Operand<i32> {
+def uimm5_plus32 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass;
}
-def uimm5 : Operand<i32> {
+def uimm5_plus33 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+ let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+ let ParserMatchClass = ConstantUImm5Plus33AsmOperandClass;
}
-def uimm6 : Operand<i32> {
+def uimm5_plus32_normalize : Operand<i32> {
let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+def uimm5_lsl2 : Operand<OtherVT> {
+ let EncoderMethod = "getUImm5Lsl2Encoding";
+ let DecoderMethod = "DecodeUImm5lsl2";
+ let ParserMatchClass = ConstantUImm5Lsl2AsmOperandClass;
+}
+
+def uimm5_plus32_normalize_64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+foreach I = {5} in
+ def uimm # I # _64 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass =
+ !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+ }
+
+// Like uimm5_64 but reports a less confusing error for 32-63 when
+// an instruction alias permits that.
+def uimm5_64_report_uimm6 : Operand<i64> {
+ let PrintMethod = "printUnsignedImm";
+ let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass;
}
def uimm16 : Operand<i32> {
@@ -435,6 +601,22 @@ def MipsMemAsmOperand : AsmOperandClass {
let ParserMethod = "parseMemOperand";
}
+def MipsMemSimm9AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm9";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffset<9>";
+}
+
+def MipsMemSimm9GPRAsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm9GPR";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffsetGPR<9>";
+}
+
def MipsMemSimm11AsmOperand : AsmOperandClass {
let Name = "MemOffsetSimm11";
let SuperClasses = [MipsMemAsmOperand];
@@ -485,6 +667,13 @@ def mem_msa : mem_generic {
def mem_simm9 : mem_generic {
let MIOperandInfo = (ops ptr_rc, simm9);
let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm9AsmOperand;
+}
+
+def mem_simm9gpr : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm9);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm9GPRAsmOperand;
}
def mem_simm11 : mem_generic {
@@ -512,12 +701,6 @@ def PtrRC : Operand<iPTR> {
let ParserMatchClass = GPR32AsmOperand;
}
-// size operand of ext instruction
-def size_ext : Operand<i32> {
- let EncoderMethod = "getSizeExtEncoding";
- let DecoderMethod = "DecodeExtSize";
-}
-
// size operand of ins instruction
def size_ins : Operand<i32> {
let EncoderMethod = "getSizeInsEncoding";
@@ -657,7 +840,7 @@ class shift_rotate_reg<string opstr, RegisterOperand RO, InstrItinClass itin,
[(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))], itin, FrmR,
opstr>;
-// Load Upper Imediate
+// Load Upper Immediate
class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
[], II_LUI, FrmI, opstr>, IsAsCheapAsAMove {
@@ -675,14 +858,19 @@ class Load<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
let mayLoad = 1;
}
-class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+class StoreMemory<string opstr, DAGOperand RO, DAGOperand MO,
+ SDPatternOperator OpNode = null_frag,
InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
- InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+ InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
[(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMem";
let mayStore = 1;
}
+class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+ InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+ StoreMemory<opstr, RO, mem, OpNode, Itin, Addr>;
+
// Load/Store Left/Right
let canFoldAsLoad = 1 in
class LoadLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
@@ -740,7 +928,7 @@ class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
!strconcat(opstr, "\t$rs, $rt, $offset"),
- [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
+ [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], II_BCC,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
@@ -752,7 +940,7 @@ class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"),
- [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
+ [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], II_BCCZ,
FrmI, opstr> {
let isBranch = 1;
let isTerminator = 1;
@@ -778,7 +966,7 @@ class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
SDPatternOperator targetoperator, string bopstr> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> {
+ [(operator targetoperator:$target)], II_J, FrmJ, bopstr> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -788,7 +976,7 @@ class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
// Unconditional branch
class UncondBranch<Instruction BEQInst> :
- PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>,
+ PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], II_B>,
PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
@@ -802,7 +990,7 @@ class UncondBranch<Instruction BEQInst> :
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
class JumpFR<string opstr, RegisterOperand RO,
SDPatternOperator operator = null_frag>:
- InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch,
+ InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], II_JR,
FrmR, opstr>;
// Indirect branch
@@ -815,23 +1003,23 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> {
+ [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
Register RetReg, RegisterOperand ResRO = RO>:
- PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], IIBranch>,
+ PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
class JumpLinkReg<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [], IIBranch, FrmR>;
+ [], II_JALR, FrmR, opstr>;
class BGEZAL_FT<string opstr, DAGOperand opnd,
RegisterOperand RO, bit DelaySlot = 1> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
- !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> {
+ !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZAL, FrmI, opstr> {
let hasDelaySlot = DelaySlot;
}
@@ -840,17 +1028,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
hasExtraSrcRegAllocReq = 1, Defs = [AT] in {
class TailCall<Instruction JumpInst> :
- PseudoSE<(outs), (ins calltarget:$target), [], IIBranch>,
+ PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
PseudoInstExpansion<(JumpInst jmptarget:$target)>;
class TailCallReg<RegisterOperand RO, Instruction JRInst,
RegisterOperand ResRO = RO> :
- PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], IIBranch>,
+ PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
PseudoInstExpansion<(JRInst ResRO:$rs)>;
}
class BAL_BR_Pseudo<Instruction RealInst> :
- PseudoSE<(outs), (ins brtarget:$offset), [], IIBranch>,
+ PseudoSE<(outs), (ins brtarget:$offset), [], II_BCCZAL>,
PseudoInstExpansion<(RealInst ZERO, brtarget:$offset)> {
let isBranch = 1;
let isTerminator = 1;
@@ -997,9 +1185,10 @@ class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
[(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr>;
// Subword Swap
-class SubwordSwap<string opstr, RegisterOperand RO>:
- InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
- NoItinerary, FrmR, opstr> {
+class SubwordSwap<string opstr, RegisterOperand RO,
+ InstrItinClass itin = NoItinerary>:
+ InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], itin,
+ FrmR, opstr> {
let hasSideEffects = 0;
}
@@ -1010,8 +1199,8 @@ class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
// Ext and Ins
class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
- SDPatternOperator Op = null_frag>:
- InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
+ Operand SizeOpnd, SDPatternOperator Op = null_frag> :
+ InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
FrmR, opstr>, ISA_MIPS32R2;
@@ -1074,6 +1263,9 @@ class TrapBase<Instruction RealInst>
let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
+let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, hasSideEffects=1 in
+def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>;
+
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
[(callseq_start timm:$amt)]>;
@@ -1215,10 +1407,11 @@ def LH : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel,
LW_FM<0x21>;
def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>;
let AdditionalPredicates = [NotInMicroMips] in {
-def LW : Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
+def LW : StdMMR6Rel, Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
LW_FM<0x23>;
}
-def SB : Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, LW_FM<0x28>;
+def SB : StdMMR6Rel, Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel,
+ LW_FM<0x28>;
def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>;
let AdditionalPredicates = [NotInMicroMips] in {
def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
@@ -1259,15 +1452,17 @@ let DecoderNamespace = "COP3_" in {
}
}
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
-def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
+def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
-def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
-def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
-def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
-def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
-def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
-def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
+ def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
+ def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
+ def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
+ def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
+ def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+}
def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
ISA_MIPS2_NOT_32R6_64R6;
@@ -1290,14 +1485,15 @@ def TRAP : TrapBase<BREAK>;
def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
let AdditionalPredicates = [NotInMicroMips] in {
-def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
+ def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18, 0x0>, INSN_MIPS3_32;
+ def ERETNC : MMRel, ER_FT<"eretnc">, ER_FM<0x18, 0x1>, ISA_MIPS32R5;
+ def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f, 0x0>, ISA_MIPS32;
}
-def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
let AdditionalPredicates = [NotInMicroMips] in {
-def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+ def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+ def DI : MMRel, StdMMR6Rel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
}
-def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
AdditionalPredicates = [NotInMicroMips] in {
@@ -1359,7 +1555,8 @@ def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
class PseudoIndirectBranchBase<RegisterOperand RO> :
- MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> {
+ MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
+ II_IndirectBranchPseudo> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
@@ -1369,12 +1566,12 @@ class PseudoIndirectBranchBase<RegisterOperand RO> :
def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
-// Return instructions are matched as a RetRA instruction, then ar expanded
+// Return instructions are matched as a RetRA instruction, then are expanded
// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the
// ISA.
class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs),
- [], IIBranch> {
+ [], II_ReturnPseudo> {
let isTerminator = 1;
let isBarrier = 1;
let hasDelaySlot = 1;
@@ -1441,8 +1638,11 @@ def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>,
def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>,
ISA_MIPS32_NOT_32R6_64R6;
-/// Word Swap Bytes Within Halfwords
-def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
+let AdditionalPredicates = [NotInMicroMips] in {
+ /// Word Swap Bytes Within Halfwords
+ def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>, SEB_FM<2, 0x20>,
+ ISA_MIPS32R2;
+}
/// No operation.
def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1485,10 +1685,12 @@ def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
-
+let AdditionalPredicates = [NotInMicroMips] in {
def RDHWR : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
-
-def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
+}
+// TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, MipsExt>,
+ EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
@@ -1499,9 +1701,9 @@ def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>;
class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther, asmstr>;
-def SSNOP : MMRel, Barrier<"ssnop">, BARRIER_FM<1>;
+def SSNOP : MMRel, StdMMR6Rel, Barrier<"ssnop">, BARRIER_FM<1>;
def EHB : MMRel, Barrier<"ehb">, BARRIER_FM<3>;
-def PAUSE : MMRel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+def PAUSE : MMRel, StdMMR6Rel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
// JR_HB and JALR_HB are defined here using the new style naming
// scheme because some of this code is shared with Mips32r6InstrInfo.td
@@ -1562,11 +1764,60 @@ def CACHE : MMRel, CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
def PREF : MMRel, CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
INSN_MIPS3_32_NOT_32R6_64R6;
+def ROL : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "rol\t$rs, $rt, $rd">;
+def ROLImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "rol\t$rs, $rt, $imm">;
+def : MipsInstAlias<"rol $rd, $rs",
+ (ROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"rol $rd, $imm",
+ (ROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def ROR : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "ror\t$rs, $rt, $rd">;
+def RORImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "ror\t$rs, $rt, $imm">;
+def : MipsInstAlias<"ror $rd, $rs",
+ (ROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"ror $rd, $imm",
+ (RORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def DROL : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "drol\t$rs, $rt, $rd">, ISA_MIPS64;
+def DROLImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "drol\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $rs",
+ (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $imm",
+ (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
+def DROR : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+ "dror\t$rs, $rt, $rd">, ISA_MIPS64;
+def DRORImm : MipsAsmPseudoInst<(outs),
+ (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "dror\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $rs",
+ (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $imm",
+ (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
def : MipsInstAlias<"move $dst, $src",
- (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>,
+ (OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
+ GPR_32 {
+ let AdditionalPredicates = [NotInMicroMips];
+}
+def : MipsInstAlias<"move $dst, $src",
+ (ADDu GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
GPR_32 {
let AdditionalPredicates = [NotInMicroMips];
}
@@ -1630,27 +1881,27 @@ def : MipsInstAlias<"beqz $rs,$offset",
def : MipsInstAlias<"beqzl $rs,$offset",
(BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
-
+
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
let AdditionalPredicates = [NotInMicroMips] in {
-def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
-}
-def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
-
-def : MipsInstAlias<"teq $rs, $rt",
- (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tge $rs, $rt",
- (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tgeu $rs, $rt",
- (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tlt $rs, $rt",
- (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tltu $rs, $rt",
- (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tne $rs, $rt",
- (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-
+ def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+ def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
+}
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsInstAlias<"teq $rs, $rt",
+ (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tge $rs, $rt",
+ (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tgeu $rs, $rt",
+ (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tlt $rs, $rt",
+ (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tltu $rs, $rt",
+ (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+ def : MipsInstAlias<"tne $rs, $rt",
+ (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+}
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sub, $rd, $rs, $imm",
@@ -1678,7 +1929,7 @@ def : MipsInstAlias<"sync",
class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", simm32, GPR32Opnd>;
class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
RegisterOperand RO> :
@@ -1689,13 +1940,16 @@ def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
!strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", simm32, GPR32Opnd>;
def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
"jal\t$rd, $rs"> ;
def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
"jal\t$rs"> ;
+def NORImm : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+ "nor\t$rs, $rt, $imm"> ;
+
let hasDelaySlot = 1 in {
def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
(ins imm64:$imm64, brtarget:$offset),
@@ -1718,12 +1972,62 @@ def BLTU : CondBranchPseudo<"bltu">;
def BLEU : CondBranchPseudo<"bleu">;
def BGEU : CondBranchPseudo<"bgeu">;
def BGTU : CondBranchPseudo<"bgtu">;
+def BLTL : CondBranchPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEL : CondBranchPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEL : CondBranchPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTL : CondBranchPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTUL: CondBranchPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEUL: CondBranchPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEUL: CondBranchPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTUL: CondBranchPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+class CondBranchImmPseudo<string instr_asm> :
+ MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
+ !strconcat(instr_asm, "\t$rs, $imm, $offset")>;
+
+def BLTImmMacro : CondBranchImmPseudo<"blt">;
+def BLEImmMacro : CondBranchImmPseudo<"ble">;
+def BGEImmMacro : CondBranchImmPseudo<"bge">;
+def BGTImmMacro : CondBranchImmPseudo<"bgt">;
+def BLTUImmMacro : CondBranchImmPseudo<"bltu">;
+def BLEUImmMacro : CondBranchImmPseudo<"bleu">;
+def BGEUImmMacro : CondBranchImmPseudo<"bgeu">;
+def BGTUImmMacro : CondBranchImmPseudo<"bgtu">;
+def BLTLImmMacro : CondBranchImmPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLELImmMacro : CondBranchImmPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGELImmMacro : CondBranchImmPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTLImmMacro : CondBranchImmPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTULImmMacro : CondBranchImmPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEULImmMacro : CondBranchImmPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEULImmMacro : CondBranchImmPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+// FIXME: Predicates are removed because instructions are matched regardless of
+// predicates, because PredicateControl was not in the hierarchy. This was
+// done to emit more precise error message from expansion function.
+// Once the tablegen-erated errors are made better, this needs to be fixed and
+// predicates needs to be restored.
+
+def SDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "div\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def UDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "divu\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def DSDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "ddiv\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def DUDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+ "ddivu\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+ "ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
- "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+ "ulhu\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
- "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+ "ulw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Arbitrary patterns that map to one or more instructions
@@ -1939,6 +2243,16 @@ let AddedComplexity = 40 in {
}
}
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR32:$v), (SB GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR32:$v), (SH GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR32:$v), (SW GPR32:$v, addr:$a)>;
+
//===----------------------------------------------------------------------===//
// Floating Point Support
//===----------------------------------------------------------------------===//
@@ -1964,6 +2278,10 @@ include "MipsDSPInstrInfo.td"
include "MipsMSAInstrFormats.td"
include "MipsMSAInstrInfo.td"
+// EVA
+include "MipsEVAInstrFormats.td"
+include "MipsEVAInstrInfo.td"
+
// Micromips
include "MicroMipsInstrFormats.td"
include "MicroMipsInstrInfo.td"
@@ -1972,3 +2290,11 @@ include "MicroMipsInstrFPU.td"
// Micromips r6
include "MicroMips32r6InstrFormats.td"
include "MicroMips32r6InstrInfo.td"
+
+// Micromips64 r6
+include "MicroMips64r6InstrFormats.td"
+include "MicroMips64r6InstrInfo.td"
+
+// Micromips DSP
+include "MicroMipsDSPInstrFormats.td"
+include "MicroMipsDSPInstrInfo.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index 90f8cc0..49fb99a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -148,7 +148,7 @@ void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
// Insert NewMBB and fix control flow.
MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
NewMBB->transferSuccessors(MBB);
- NewMBB->removeSuccessor(Tgt);
+ NewMBB->removeSuccessor(Tgt, true);
MBB->addSuccessor(NewMBB);
MBB->addSuccessor(Tgt);
MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -161,7 +161,7 @@ void MipsLongBranch::initMBBInfo() {
// Split the MBBs if they have two branches. Each basic block should have at
// most one branch after this loop is executed.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
- splitMBB(I++);
+ splitMBB(&*I++);
MF->RenumberBlocks();
MBBInfos.clear();
@@ -262,8 +262,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
static_cast<const MipsInstrInfo *>(Subtarget.getInstrInfo());
MF->insert(FallThroughMBB, LongBrMBB);
- MBB->removeSuccessor(TgtMBB);
- MBB->addSuccessor(LongBrMBB);
+ MBB->replaceSuccessor(TgtMBB, LongBrMBB);
if (IsPIC) {
MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
@@ -434,7 +433,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
} else
// Change branch destination and reverse condition.
- replaceBranch(*MBB, I.Br, DL, FallThroughMBB);
+ replaceBranch(*MBB, I.Br, DL, &*FallThroughMBB);
}
static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
index bff2d0f..7d25ea5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -7,18 +7,12 @@
//
//===----------------------------------------------------------------------===//
-def HasMSA : Predicate<"Subtarget->hasMSA()">,
- AssemblerPredicate<"FeatureMSA">;
-
-class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
- let Predicates = [HasMSA];
+class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl, ASE_MSA {
+ let EncodingPredicates = [HasStdEnc];
let Inst{31-26} = 0b011110;
}
-class MSA64Inst : MSAInst {
- let Predicates = [HasMSA, HasMips64];
-}
-
class MSACBranch : MSAInst {
let Inst{31-26} = 0b010001;
}
@@ -27,10 +21,6 @@ class MSASpecial : MSAInst {
let Inst{31-26} = 0b000000;
}
-class MSA64Special : MSA64Inst {
- let Inst{31-26} = 0b000000;
-}
-
class MSAPseudo<dag outs, dag ins, list<dag> pattern,
InstrItinClass itin = IIPseudo>:
MipsPseudo<outs, ins, pattern, itin> {
@@ -100,7 +90,7 @@ class MSA_2R_FILL_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSA64Inst {
+class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
bits<5> rs;
bits<5> wd;
@@ -293,7 +283,7 @@ class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
bits<4> n;
bits<5> ws;
bits<5> rd;
@@ -345,7 +335,7 @@ class MSA_ELM_INSERT_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
let Inst{5-0} = minor;
}
-class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
bits<6> n;
bits<5> rs;
bits<5> wd;
@@ -450,7 +440,7 @@ class SPECIAL_LSA_FMT<bits<6> minor>: MSASpecial {
let Inst{5-0} = minor;
}
-class SPECIAL_DLSA_FMT<bits<6> minor>: MSA64Special {
+class SPECIAL_DLSA_FMT<bits<6> minor>: MSASpecial {
bits<5> rs;
bits<5> rt;
bits<5> rd;
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 970e98e..eacfcec 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -63,30 +63,13 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
+def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
+def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
def immZExt6Ptr : ImmLeaf<iPTR, [{return isUInt<6>(Imm);}]>;
// Operands
-// The immediate of an LSA instruction needs special handling
-// as the encoded value should be subtracted by one.
-def uimm2LSAAsmOperand : AsmOperandClass {
- let Name = "LSAImm";
- let ParserMethod = "parseLSAImm";
- let RenderMethod = "addImmOperands";
-}
-
-def LSAImm : Operand<i32> {
- let PrintMethod = "printUnsignedImm";
- let EncoderMethod = "getLSAImmEncoding";
- let DecoderMethod = "DecodeLSAImm";
- let ParserMatchClass = uimm2LSAAsmOperand;
-}
-
-def uimm4 : Operand<i32> {
- let PrintMethod = "printUnsignedImm8";
-}
-
def uimm4_ptr : Operand<iPTR> {
let PrintMethod = "printUnsignedImm8";
}
@@ -95,10 +78,6 @@ def uimm6_ptr : Operand<iPTR> {
let PrintMethod = "printUnsignedImm8";
}
-def uimm8 : Operand<i32> {
- let PrintMethod = "printUnsignedImm8";
-}
-
def simm5 : Operand<i32>;
def vsplat_uimm1 : Operand<vAny> {
@@ -639,7 +618,6 @@ class COPY_S_D_ENC : MSA_ELM_COPY_D_FMT<0b0010, 0b011001>;
class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
-class COPY_U_D_ENC : MSA_ELM_COPY_D_FMT<0b0011, 0b011001>;
class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
@@ -1195,47 +1173,14 @@ class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
InstrItinClass Itinerary = itin;
}
-// This class is deprecated and will be removed soon.
-class MSA_BIT_B_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm3:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_H_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm4:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_W_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
- dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm5:$m);
- string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))];
- InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_D_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
- InstrItinClass itin = NoItinerary> {
+class MSA_BIT_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+ Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
+ InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWS:$ws, uimm6:$m);
+ dag InOperandList = (ins ROWS:$ws, ImmOp:$m);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
- list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))];
+ list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
InstrItinClass Itinerary = itin;
}
@@ -1291,13 +1236,14 @@ class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
}
class MSA_ELM_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ RegisterOperand ROWD, RegisterOperand ROWS,
+ Operand ImmOp, ImmLeaf Imm,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n);
+ dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ImmOp:$n);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws,
- immZExt4:$n))];
+ Imm:$n))];
string Constraints = "$wd = $wd_in";
InstrItinClass Itinerary = itin;
}
@@ -1479,7 +1425,7 @@ class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterOperand ROWD> {
dag InOperandList = (ins ROWD:$wt, brtarget:$offset);
string AsmString = !strconcat(instr_asm, "\t$wt, $offset");
list<dag> Pattern = [];
- InstrItinClass Itinerary = IIBranch;
+ InstrItinClass Itinerary = NoItinerary;
bit isBranch = 1;
bit isTerminator = 1;
bit hasDelaySlot = 1;
@@ -1519,13 +1465,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
}
class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+ Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+ RegisterOperand ROWS = ROWD,
InstrItinClass itin = NoItinerary> {
dag OutOperandList = (outs ROWD:$wd);
- dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2);
+ dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2);
string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]");
list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
- immZExt6:$n,
+ Imm:$n,
ROWS:$ws,
immz:$n2))];
InstrItinClass Itinerary = itin;
@@ -1934,8 +1881,6 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
GPR32Opnd, MSA128HOpnd>;
class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
GPR32Opnd, MSA128WOpnd>;
-class COPY_U_D_DESC : MSA_COPY_DESC_BASE<"copy_u.d", vextract_zext_i64, v2i64,
- GPR64Opnd, MSA128DOpnd>;
class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
MSA128W>;
@@ -2346,13 +2291,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
class INSERT_FD_VIDX64_PSEUDO_DESC :
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
-class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8,
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
MSA128BOpnd>;
-class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16,
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
MSA128HOpnd>;
-class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32,
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
MSA128WOpnd>;
-class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64,
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
MSA128DOpnd>;
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -2381,7 +2326,7 @@ class LSA_DESC_BASE<string instr_asm, RegisterOperand RORD,
RegisterOperand RORS = RORD, RegisterOperand RORT = RORD,
InstrItinClass itin = NoItinerary > {
dag OutOperandList = (outs RORD:$rd);
- dag InOperandList = (ins RORS:$rs, RORT:$rt, LSAImm:$sa);
+ dag InOperandList = (ins RORS:$rs, RORT:$rt, uimm2_plus1:$sa);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $sa");
list<dag> Pattern = [(set RORD:$rd, (add RORT:$rt,
(shl RORS:$rs,
@@ -2561,23 +2506,23 @@ class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>;
class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
-class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b,
- MSA128BOpnd>;
-class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h,
- MSA128HOpnd>;
-class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w,
- MSA128WOpnd>;
-class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d,
- MSA128DOpnd>;
-
-class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b,
- MSA128BOpnd>;
-class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h,
- MSA128HOpnd>;
-class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w,
- MSA128WOpnd>;
-class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d,
- MSA128DOpnd>;
+class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
+ immZExt6, MSA128DOpnd>;
+
+class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@@ -2589,13 +2534,17 @@ class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
- MSA128BOpnd>;
+ MSA128BOpnd, MSA128BOpnd, uimm4,
+ immZExt4>;
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
- MSA128HOpnd>;
+ MSA128HOpnd, MSA128HOpnd, uimm3,
+ immZExt3>;
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
- MSA128WOpnd>;
+ MSA128WOpnd, MSA128WOpnd, uimm2,
+ immZExt2>;
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
- MSA128DOpnd>;
+ MSA128DOpnd, MSA128DOpnd, uimm1,
+ immZExt1>;
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@@ -2648,14 +2597,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>;
class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
-class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b,
- MSA128BOpnd>;
-class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h,
- MSA128HOpnd>;
-class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w,
- MSA128WOpnd>;
-class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d,
- MSA128DOpnd>;
+class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@@ -2676,14 +2625,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>;
class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
-class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b,
- MSA128BOpnd>;
-class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h,
- MSA128HOpnd>;
-class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w,
- MSA128WOpnd>;
-class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d,
- MSA128DOpnd>;
+class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
+ immZExt3, MSA128BOpnd>;
+class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
+ immZExt4, MSA128HOpnd>;
+class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
+ immZExt5, MSA128WOpnd>;
+class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
+ immZExt6, MSA128DOpnd>;
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
ValueType TyNode, RegisterOperand ROWD,
@@ -2991,12 +2940,11 @@ def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
-def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC;
+def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC, ASE_MSA64;
def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
-def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
-def COPY_U_D : COPY_U_D_ENC, COPY_U_D_DESC;
+def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, ASE_MSA64;
def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
@@ -3108,7 +3056,7 @@ def FFQR_D : FFQR_D_ENC, FFQR_D_DESC;
def FILL_B : FILL_B_ENC, FILL_B_DESC;
def FILL_H : FILL_H_ENC, FILL_H_DESC;
def FILL_W : FILL_W_ENC, FILL_W_DESC;
-def FILL_D : FILL_D_ENC, FILL_D_DESC;
+def FILL_D : FILL_D_ENC, FILL_D_DESC, ASE_MSA64;
def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
@@ -3238,7 +3186,7 @@ def ILVR_D : ILVR_D_ENC, ILVR_D_DESC;
def INSERT_B : INSERT_B_ENC, INSERT_B_DESC;
def INSERT_H : INSERT_H_ENC, INSERT_H_DESC;
def INSERT_W : INSERT_W_ENC, INSERT_W_DESC;
-def INSERT_D : INSERT_D_ENC, INSERT_D_DESC;
+def INSERT_D : INSERT_D_ENC, INSERT_D_DESC, ASE_MSA64;
// INSERT_FW_PSEUDO defined after INSVE_W
// INSERT_FD_PSEUDO defined after INSVE_D
@@ -3280,7 +3228,7 @@ def LDI_W : LDI_W_ENC, LDI_W_DESC;
def LDI_D : LDI_D_ENC, LDI_D_DESC;
def LSA : LSA_ENC, LSA_DESC;
-def DLSA : DLSA_ENC, DLSA_DESC;
+def DLSA : DLSA_ENC, DLSA_DESC, ASE_MSA64;
def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC;
def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC;
@@ -3787,6 +3735,28 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
MSA128B, NoItinerary>;
+// Vector extraction with fixed index.
+//
+// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
+// COPY_U_W, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a MIPS64 can execute MIPS32 code without getting
+// different register values.
+def : MSAPat<(vextract_zext_i32 (v4i32 MSA128W:$ws), immZExt2Ptr:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+def : MSAPat<(vextract_zext_i32 (v4f32 MSA128W:$ws), immZExt2Ptr:$idx),
+ (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+
+// Extracting 64-bit values on MSA64 should always use COPY_S_D rather than
+// COPY_U_D, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a hypothetical MIPS128 would be able to execute MIPS64
+// code without getting different register values.
+def : MSAPat<(vextract_zext_i64 (v2i64 MSA128D:$ws), immZExt1Ptr:$idx),
+ (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+def : MSAPat<(vextract_zext_i64 (v2f64 MSA128D:$ws), immZExt1Ptr:$idx),
+ (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+
// Vector extraction with variable index
def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)),
(SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws,
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index 0d1ee04..c7d2738 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -24,42 +24,6 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
-// class MipsCallEntry.
-MipsCallEntry::MipsCallEntry(StringRef N) {
-#ifndef NDEBUG
- Name = N;
- Val = nullptr;
-#endif
-}
-
-MipsCallEntry::MipsCallEntry(const GlobalValue *V) {
-#ifndef NDEBUG
- Val = V;
-#endif
-}
-
-bool MipsCallEntry::isConstant(const MachineFrameInfo *) const {
- return false;
-}
-
-bool MipsCallEntry::isAliased(const MachineFrameInfo *) const {
- return false;
-}
-
-bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const {
- return false;
-}
-
-void MipsCallEntry::printCustom(raw_ostream &O) const {
- O << "MipsCallEntry: ";
-#ifndef NDEBUG
- if (Val)
- O << Val->getName();
- else
- O << Name;
-#endif
-}
-
MipsFunctionInfo::~MipsFunctionInfo() {}
bool MipsFunctionInfo::globalBaseRegSet() const {
@@ -111,27 +75,32 @@ void MipsFunctionInfo::createEhDataRegsFI() {
}
}
+void MipsFunctionInfo::createISRRegFI() {
+ // ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers.
+ // The current implementation only supports Mips32r2+ not Mips64rX. Status
+ // is always 32 bits, ErrorPC is 32 or 64 bits dependant on architecture,
+ // however Mips32r2+ is the supported architecture.
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+
+ for (int I = 0; I < 2; ++I)
+ ISRDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(
+ RC->getSize(), RC->getAlignment(), false);
+}
+
bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
|| FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
- std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
-
- if (!E)
- E = llvm::make_unique<MipsCallEntry>(Name);
-
- return MachinePointerInfo(E.get());
+bool MipsFunctionInfo::isISRRegFI(int FI) const {
+ return IsISR && (FI == ISRDataRegFI[0] || FI == ISRDataRegFI[1]);
+}
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const char *ES) {
+ return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES));
}
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
- std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
-
- if (!E)
- E = llvm::make_unique<MipsCallEntry>(Val);
-
- return MachinePointerInfo(E.get());
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
+ return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV));
}
int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
index 32436ef..a2f6ee0 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.h
@@ -15,12 +15,10 @@
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,31 +28,13 @@
namespace llvm {
-/// \brief A class derived from PseudoSourceValue that represents a GOT entry
-/// resolved by lazy-binding.
-class MipsCallEntry : public PseudoSourceValue {
-public:
- explicit MipsCallEntry(StringRef N);
- explicit MipsCallEntry(const GlobalValue *V);
- bool isConstant(const MachineFrameInfo *) const override;
- bool isAliased(const MachineFrameInfo *) const override;
- bool mayAlias(const MachineFrameInfo *) const override;
-
-private:
- void printCustom(raw_ostream &O) const override;
-#ifndef NDEBUG
- std::string Name;
- const GlobalValue *Val;
-#endif
-};
-
/// MipsFunctionInfo - This class is derived from MachineFunction private
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
public:
MipsFunctionInfo(MachineFunction &MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
- VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false),
+ VarArgsFrameIndex(0), CallsEhReturn(false), IsISR(false), SaveS2(false),
MoveF64ViaSpillFI(-1) {}
~MipsFunctionInfo();
@@ -86,13 +66,21 @@ public:
int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
bool isEhDataRegFI(int FI) const;
- /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
- /// representing a GOT entry for an external function.
- MachinePointerInfo callPtrInfo(StringRef Name);
+ /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue
+ /// object representing a GOT entry for an external function.
+ MachinePointerInfo callPtrInfo(const char *ES);
+
+ // Functions with the "interrupt" attribute require special prologues,
+ // epilogues and additional spill slots.
+ bool isISR() const { return IsISR; }
+ void setISR() { IsISR = true; }
+ void createISRRegFI();
+ int getISRRegFI(unsigned Reg) const { return ISRDataRegFI[Reg]; }
+ bool isISRRegFI(int FI) const;
- /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+ /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object
/// representing a GOT entry for a global function.
- MachinePointerInfo callPtrInfo(const GlobalValue *Val);
+ MachinePointerInfo callPtrInfo(const GlobalValue *GV);
void setSaveS2() { SaveS2 = true; }
bool hasSaveS2() const { return SaveS2; }
@@ -136,17 +124,18 @@ private:
/// Frame objects for spilling eh data registers.
int EhDataRegFI[4];
+ /// ISR - Whether the function is an Interrupt Service Routine.
+ bool IsISR;
+
+ /// Frame objects for spilling C0_STATUS, C0_EPC
+ int ISRDataRegFI[2];
+
// saveS2
bool SaveS2;
/// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
/// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
int MoveF64ViaSpillFI;
-
- /// MipsCallEntry maps.
- StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
- ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
- GlobalCallEntries;
};
} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index f6647e6..28e5a42 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -84,6 +84,16 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
+ const Function *F = MF->getFunction();
+ if (F->hasFnAttribute("interrupt")) {
+ if (Subtarget.hasMips64())
+ return Subtarget.hasMips64r6() ? CSR_Interrupt_64R6_SaveList
+ : CSR_Interrupt_64_SaveList;
+ else
+ return Subtarget.hasMips32r6() ? CSR_Interrupt_32R6_SaveList
+ : CSR_Interrupt_32_SaveList;
+ }
+
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
@@ -284,6 +294,16 @@ getFrameRegister(const MachineFunction &MF) const {
}
bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ // Avoid realigning functions that explicitly do not want to be realigned.
+ // Normally, we should report an error when a function should be dynamically
+ // realigned but also has the attribute no-realign-stack. Unfortunately,
+ // with this attribute, MachineFrameInfo clamps each new object's alignment
+ // to that of the stack's alignment as specified by the ABI. As a result,
+ // the information of whether we have objects with larger alignment
+ // requirement than the stack's alignment is already lost at this point.
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
@@ -306,42 +326,3 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
// sized objects.
return MF.getRegInfo().canReserveReg(BP);
}
-
-bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- bool CanRealign = canRealignStack(MF);
-
- // Avoid realigning functions that explicitly do not want to be realigned.
- // Normally, we should report an error when a function should be dynamically
- // realigned but also has the attribute no-realign-stack. Unfortunately,
- // with this attribute, MachineFrameInfo clamps each new object's alignment
- // to that of the stack's alignment as specified by the ABI. As a result,
- // the information of whether we have objects with larger alignment
- // requirement than the stack's alignment is already lost at this point.
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- const Function *F = MF.getFunction();
- if (F->hasFnAttribute(Attribute::StackAlignment)) {
-#ifdef DEBUG
- if (!CanRealign)
- DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
- << F->getName() << "\n");
-#endif
- return CanRealign;
- }
-
- unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment();
- if (MFI->getMaxAlignment() > StackAlignment) {
-#ifdef DEBUG
- if (!CanRealign)
- DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
- << F->getName() << "\n");
-#endif
- return CanRealign;
- }
-
- return false;
-}
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
index ee1f6bc..5de68a2 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -61,9 +61,7 @@ public:
RegScavenger *RS = nullptr) const;
// Stack realignment queries.
- bool canRealignStack(const MachineFunction &MF) const;
-
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
/// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index 096b3be..a4abd62 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -17,6 +17,7 @@
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -319,6 +320,15 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
bool FP64) const {
+ const MachineOperand &Op1 = I->getOperand(1);
+ const MachineOperand &Op2 = I->getOperand(2);
+
+ if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) {
+ unsigned DstReg = I->getOperand(0).getReg();
+ BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg);
+ return true;
+ }
+
// For fpxx and when mfhc1 is not available, use:
// spill + reload via ldc1
//
@@ -335,8 +345,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
(FP64 && !Subtarget.useOddSPReg())) {
unsigned DstReg = I->getOperand(0).getReg();
- unsigned SrcReg = I->getOperand(1).getReg();
- unsigned N = I->getOperand(2).getImm();
+ unsigned SrcReg = Op1.getReg();
+ unsigned N = Op2.getImm();
int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
// It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
@@ -352,8 +362,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
// We re-use the same spill slot each time so that the stack frame doesn't
// grow too much in functions with a large number of moves.
int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
- TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC,
- &RegInfo, 0);
+ TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0);
TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset);
return true;
}
@@ -376,12 +385,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
*static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc dl;
MipsABIInfo ABI = STI.getABI();
unsigned SP = ABI.GetStackPtr();
unsigned FP = ABI.GetFramePtr();
unsigned ZERO = ABI.GetNullPtr();
- unsigned ADDu = ABI.GetPtrAdduOp();
+ unsigned MOVE = ABI.GetGPRMoveOp();
unsigned ADDiu = ABI.GetPtrAddiuOp();
unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND;
@@ -407,6 +416,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+ if (MF.getFunction()->hasFnAttribute("interrupt"))
+ emitInterruptPrologueStub(MF, MBB);
+
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.size()) {
@@ -491,7 +503,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO)
+ BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO)
.setMIFlag(MachineInstr::FrameSetup);
// emit ".cfi_def_cfa_register $fp"
@@ -514,7 +526,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
if (hasBP(MF)) {
// move $s7, $sp
unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7;
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP)
+ BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP)
.addReg(SP)
.addReg(ZERO);
}
@@ -522,6 +534,135 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void MipsSEFrameLowering::emitInterruptPrologueStub(
+ MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Report an error the target doesn't support Mips32r2 or later.
+ // The epilogue relies on the use of the "ehb" to clear execution
+ // hazards. Pre R2 Mips relies on an implementation defined number
+ // of "ssnop"s to clear the execution hazard. Support for ssnop hazard
+ // clearing is not provided so reject that configuration.
+ if (!STI.hasMips32r2())
+ report_fatal_error(
+ "\"interrupt\" attribute is not supported on pre-MIPS32R2 or "
+ "MIPS16 targets.");
+
+ // The GP register contains the "user" value, so we cannot perform
+ // any gp relative loads until we restore the "kernel" or "system" gp
+ // value. Until support is written we shall only accept the static
+ // relocation model.
+ if ((STI.getRelocationModel() != Reloc::Static))
+ report_fatal_error("\"interrupt\" attribute is only supported for the "
+ "static relocation model on MIPS at the present time.");
+
+ if (!STI.isABI_O32() || STI.hasMips64())
+ report_fatal_error("\"interrupt\" attribute is only supported for the "
+ "O32 ABI on MIPS32R2+ at the present time.");
+
+ // Perform ISR handling like GCC
+ StringRef IntKind =
+ MF.getFunction()->getFnAttribute("interrupt").getValueAsString();
+ const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+ // EIC interrupt handling needs to read the Cause register to disable
+ // interrupts.
+ if (IntKind == "eic") {
+ // Coprocessor registers are always live per se.
+ MBB.addLiveIn(Mips::COP013);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0)
+ .addReg(Mips::COP013)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0)
+ .addReg(Mips::K0)
+ .addImm(10)
+ .addImm(6)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Fetch and spill EPC
+ MBB.addLiveIn(Mips::COP014);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+ .addReg(Mips::COP014)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+ MipsFI->getISRRegFI(0), PtrRC,
+ STI.getRegisterInfo(), 0);
+
+ // Fetch and Spill Status
+ MBB.addLiveIn(Mips::COP012);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+ .addReg(Mips::COP012)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+ MipsFI->getISRRegFI(1), PtrRC,
+ STI.getRegisterInfo(), 0);
+
+ // Build the configuration for disabling lower priority interrupts. Non EIC
+ // interrupts need to be masked off with zero, EIC from the Cause register.
+ unsigned InsPosition = 8;
+ unsigned InsSize = 0;
+ unsigned SrcReg = Mips::ZERO;
+
+ // If the interrupt we're tied to is the EIC, switch the source for the
+ // masking off interrupts to the cause register.
+ if (IntKind == "eic") {
+ SrcReg = Mips::K0;
+ InsPosition = 10;
+ InsSize = 6;
+ } else
+ InsSize = StringSwitch<unsigned>(IntKind)
+ .Case("sw0", 1)
+ .Case("sw1", 2)
+ .Case("hw0", 3)
+ .Case("hw1", 4)
+ .Case("hw2", 5)
+ .Case("hw3", 6)
+ .Case("hw4", 7)
+ .Case("hw5", 8)
+ .Default(0);
+ assert(InsSize != 0 && "Unknown interrupt type!");
+
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(SrcReg)
+ .addImm(InsPosition)
+ .addImm(InsSize)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Mask off KSU, ERL, EXL
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(Mips::ZERO)
+ .addImm(1)
+ .addImm(4)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Disable the FPU as we are not spilling those register sets.
+ if (!STI.useSoftFloat())
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+ .addReg(Mips::ZERO)
+ .addImm(29)
+ .addImm(1)
+ .addReg(Mips::K1)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Set the new status
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+ .addReg(Mips::K1)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+}
+
void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -533,12 +674,12 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
const MipsRegisterInfo &RegInfo =
*static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
- DebugLoc dl = MBBI->getDebugLoc();
+ DebugLoc DL = MBBI->getDebugLoc();
MipsABIInfo ABI = STI.getABI();
unsigned SP = ABI.GetStackPtr();
unsigned FP = ABI.GetFramePtr();
unsigned ZERO = ABI.GetNullPtr();
- unsigned ADDu = ABI.GetPtrAdduOp();
+ unsigned MOVE = ABI.GetGPRMoveOp();
// if framepointer enabled, restore the stack pointer.
if (hasFP(MF)) {
@@ -549,7 +690,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
--I;
// Insert instruction "move $sp, $fp" at this location.
- BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+ BuildMI(MBB, I, DL, TII.get(MOVE), SP).addReg(FP).addReg(ZERO);
}
if (MipsFI->callsEhReturn()) {
@@ -568,6 +709,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+ if (MF.getFunction()->hasFnAttribute("interrupt"))
+ emitInterruptEpilogueStub(MF, MBB);
+
// Get the number of bytes from FrameInfo
uint64_t StackSize = MFI->getStackSize();
@@ -578,13 +722,59 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
}
+void MipsSEFrameLowering::emitInterruptEpilogueStub(
+ MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Perform ISR handling like GCC
+ const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+ // Disable Interrupts.
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::DI), Mips::ZERO);
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB));
+
+ // Restore EPC
+ STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+ MipsFI->getISRRegFI(0), PtrRC,
+ STI.getRegisterInfo());
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014)
+ .addReg(Mips::K1)
+ .addImm(0);
+
+ // Restore Status
+ STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+ MipsFI->getISRRegFI(1), PtrRC,
+ STI.getRegisterInfo());
+ BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+ .addReg(Mips::K1)
+ .addImm(0);
+}
+
+int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ MipsABIInfo ABI = STI.getABI();
+
+ if (MFI->isFixedObjectIndex(FI))
+ FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr();
+ else
+ FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
+
+ return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+ getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
bool MipsSEFrameLowering::
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
- MachineBasicBlock *EntryBlock = MF->begin();
+ MachineBasicBlock *EntryBlock = &MF->front();
const TargetInstrInfo &TII = *STI.getInstrInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -599,6 +789,26 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!IsRAAndRetAddrIsTaken)
EntryBlock->addLiveIn(Reg);
+ // ISRs require HI/LO to be spilled into kernel registers to be then
+ // spilled to the stack frame.
+ bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 ||
+ Reg == Mips::HI0 || Reg == Mips::HI0_64);
+ const Function *Func = MBB.getParent()->getFunction();
+ if (IsLOHI && Func->hasFnAttribute("interrupt")) {
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned Op = 0;
+ if (!STI.getABI().ArePtrs64bit()) {
+ Op = (Reg == Mips::HI0) ? Mips::MFHI : Mips::MFLO;
+ Reg = Mips::K0;
+ } else {
+ Op = (Reg == Mips::HI0) ? Mips::MFHI64 : Mips::MFLO64;
+ Reg = Mips::K0_64;
+ }
+ BuildMI(MBB, MI, DL, TII.get(Op), Mips::K0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Insert the spill to the stack frame.
bool IsKill = !IsRAAndRetAddrIsTaken;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -622,7 +832,8 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
}
/// Mark \p Reg and all registers aliasing it in the bitset.
-void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) {
+static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
+ unsigned Reg) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
SavedRegs.set(*AI);
@@ -648,6 +859,10 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (MipsFI->callsEhReturn())
MipsFI->createEhDataRegsFI();
+ // Create spill slots for Coprocessor 0 registers if function is an ISR.
+ if (MipsFI->isISR())
+ MipsFI->createISRRegFI();
+
// Expand pseudo instructions which load, store or copy accumulators.
// Add an emergency spill slot if a pseudo was expanded.
if (ExpandPseudo(MF).expand()) {
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
index 9cb32e6..63cd3ce 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,6 +27,9 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -37,8 +40,13 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS) const override;
unsigned ehDataReg(unsigned I) const;
-};
+private:
+ void emitInterruptEpilogueStub(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+ void emitInterruptPrologueStub(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
+};
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 2ebfbd1..6f001ea 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -136,7 +136,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MachineBasicBlock::iterator I = MBB.begin();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index b319fd0..efe22fb 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1181,6 +1181,10 @@ bool MipsSETargetLowering::isEligibleForTailCallOptimization(
if (!EnableMipsTailCalls)
return false;
+ // Exception has to be cleared with eret.
+ if (FI.isISR())
+ return false;
+
// Return false if either the callee or caller has a byval argument.
if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
return false;
@@ -1786,9 +1790,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
case Intrinsic::mips_fadd_w:
- case Intrinsic::mips_fadd_d:
+ case Intrinsic::mips_fadd_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
// Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
case Intrinsic::mips_fceq_w:
case Intrinsic::mips_fceq_d:
@@ -1831,9 +1837,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2), ISD::SETUNE);
case Intrinsic::mips_fdiv_w:
- case Intrinsic::mips_fdiv_d:
+ case Intrinsic::mips_fdiv_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ffint_u_w:
case Intrinsic::mips_ffint_u_d:
return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
@@ -1856,6 +1864,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::mips_fexp2_w:
case Intrinsic::mips_fexp2_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(
ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
@@ -1869,11 +1878,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
case Intrinsic::mips_fmul_w:
- case Intrinsic::mips_fmul_d:
+ case Intrinsic::mips_fmul_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_fmsub_w:
case Intrinsic::mips_fmsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
EVT ResTy = Op->getValueType(0);
return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
@@ -1886,9 +1898,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_fsqrt_d:
return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
case Intrinsic::mips_fsub_w:
- case Intrinsic::mips_fsub_d:
+ case Intrinsic::mips_fsub_d: {
+ // TODO: If intrinsics have fast-math-flags, propagate them.
return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
Op->getOperand(2));
+ }
case Intrinsic::mips_ftrunc_u_w:
case Intrinsic::mips_ftrunc_u_d:
return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 786307b..e6f7fe9 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -88,7 +88,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (isMicroMips)
Opc = Mips::MOVE16_MM;
else
- Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+ Opc = Mips::OR, ZeroReg = Mips::ZERO;
} else if (Mips::CCRRegClass.contains(SrcReg))
Opc = Mips::CFC1;
else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -141,7 +141,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = Mips::FMOV_D64;
else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
if (Mips::GPR64RegClass.contains(SrcReg))
- Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+ Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
else if (Mips::HI64RegClass.contains(SrcReg))
Opc = Mips::MFHI64, SrcReg = 0;
else if (Mips::LO64RegClass.contains(SrcReg))
@@ -182,7 +182,6 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
int64_t Offset) const {
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
unsigned Opc = 0;
@@ -213,6 +212,33 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::ST_W;
else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
Opc = Mips::ST_D;
+ else if (Mips::LO32RegClass.hasSubClassEq(RC))
+ Opc = Mips::SW;
+ else if (Mips::LO64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SD;
+ else if (Mips::HI32RegClass.hasSubClassEq(RC))
+ Opc = Mips::SW;
+ else if (Mips::HI64RegClass.hasSubClassEq(RC))
+ Opc = Mips::SD;
+
+ // Hi, Lo are normally caller save but they are callee save
+ // for interrupt handling.
+ const Function *Func = MBB.getParent()->getFunction();
+ if (Func->hasFnAttribute("interrupt")) {
+ if (Mips::HI32RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0);
+ SrcReg = Mips::K0;
+ } else if (Mips::HI64RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64);
+ SrcReg = Mips::K0_64;
+ } else if (Mips::LO32RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0);
+ SrcReg = Mips::K0;
+ } else if (Mips::LO64RegClass.hasSubClassEq(RC)) {
+ BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64);
+ SrcReg = Mips::K0_64;
+ }
+ }
assert(Opc && "Register class not handled!");
BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
@@ -228,6 +254,11 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
unsigned Opc = 0;
+ const Function *Func = MBB.getParent()->getFunction();
+ bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") &&
+ (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 ||
+ DestReg == Mips::HI0 || DestReg == Mips::HI0_64);
+
if (Mips::GPR32RegClass.hasSubClassEq(RC))
Opc = Mips::LW;
else if (Mips::GPR64RegClass.hasSubClassEq(RC))
@@ -254,10 +285,44 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Opc = Mips::LD_W;
else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
Opc = Mips::LD_D;
+ else if (Mips::HI32RegClass.hasSubClassEq(RC))
+ Opc = Mips::LW;
+ else if (Mips::HI64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LD;
+ else if (Mips::LO32RegClass.hasSubClassEq(RC))
+ Opc = Mips::LW;
+ else if (Mips::LO64RegClass.hasSubClassEq(RC))
+ Opc = Mips::LD;
assert(Opc && "Register class not handled!");
- BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
- .addMemOperand(MMO);
+
+ if (!ReqIndirectLoad)
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMemOperand(MMO);
+ else {
+ // Load HI/LO through K0. Notably the DestReg is encoded into the
+ // instruction itself.
+ unsigned Reg = Mips::K0;
+ unsigned LdOp = Mips::MTLO;
+ if (DestReg == Mips::HI0)
+ LdOp = Mips::MTHI;
+
+ if (Subtarget.getABI().ArePtrs64bit()) {
+ Reg = Mips::K0_64;
+ if (DestReg == Mips::HI0_64)
+ LdOp = Mips::MTHI64;
+ else
+ LdOp = Mips::MTLO64;
+ }
+
+ BuildMI(MBB, I, DL, get(Opc), Reg)
+ .addFrameIndex(FI)
+ .addImm(Offset)
+ .addMemOperand(MMO);
+ BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg);
+ }
}
bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
@@ -271,6 +336,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case Mips::RetRA:
expandRetRA(MBB, MI);
break;
+ case Mips::ERet:
+ expandERet(MBB, MI);
+ break;
case Mips::PseudoMFHI:
Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
expandPseudoMFHiLo(MBB, MI, Opc);
@@ -360,7 +428,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
MipsABIInfo ABI = Subtarget.getABI();
- DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
unsigned ADDu = ABI.GetPtrAdduOp();
unsigned ADDiu = ABI.GetPtrAddiuOp();
@@ -438,6 +506,11 @@ void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
}
+void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET));
+}
+
std::pair<bool, bool>
MipsSEInstrInfo::compareOpndSize(unsigned Opc,
const MachineFunction &MF) const {
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
index bebbabf..5d73545 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h
@@ -82,6 +82,8 @@ private:
void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
+ void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
+
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index 132c3a1..b1e2885 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -126,17 +126,19 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
}
bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
-
+ bool IsISRRegFI = MipsFI->isISRRegFI(FrameIndex);
// The following stack frame objects are always referenced relative to $sp:
// 1. Outgoing arguments.
// 2. Pointer to dynamically allocated stack space.
// 3. Locations for callee-saved registers.
// 4. Locations for eh data registers.
+ // 5. Locations for ISR saved Coprocessor 0 registers 12 & 14.
// Everything else is referenced relative to whatever register
// getFrameRegister() returns.
unsigned FrameReg;
- if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
+ if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI ||
+ IsISRRegFI)
FrameReg = ABI.GetStackPtr();
else if (RegInfo->needsStackRealignment(MF)) {
if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex))
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 54b5d28..37f9e49 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -16,8 +16,8 @@ def IMULDIV : FuncUnit;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for Mips
//===----------------------------------------------------------------------===//
-def IIAlu : InstrItinClass;
-def IIBranch : InstrItinClass;
+// IIM16Alu is a placeholder class for most MIPS16 instructions.
+def IIM16Alu : InstrItinClass;
def IIPseudo : InstrItinClass;
def II_ABS : InstrItinClass;
@@ -28,7 +28,19 @@ def II_ADD_D : InstrItinClass;
def II_ADD_S : InstrItinClass;
def II_AND : InstrItinClass;
def II_ANDI : InstrItinClass;
+def II_B : InstrItinClass;
def II_BADDU : InstrItinClass;
+def II_BBIT : InstrItinClass; // bbit[01], bbit[01]32
+def II_BC : InstrItinClass;
+def II_BC1F : InstrItinClass;
+def II_BC1FL : InstrItinClass;
+def II_BC1T : InstrItinClass;
+def II_BC1TL : InstrItinClass;
+def II_BCC : InstrItinClass; // beq and bne
+def II_BCCZ : InstrItinClass; // b[gl][et]z
+def II_BCCZAL : InstrItinClass; // bgezal and bltzal
+def II_BCCZALS : InstrItinClass; // bgezals and bltzals
+def II_BCCZC : InstrItinClass; // beqzc, bnezc
def II_CEIL : InstrItinClass;
def II_CFC1 : InstrItinClass;
def II_CLO : InstrItinClass;
@@ -68,21 +80,39 @@ def II_DSUB : InstrItinClass;
def II_EXT : InstrItinClass; // Any EXT instruction
def II_FLOOR : InstrItinClass;
def II_INS : InstrItinClass; // Any INS instruction
+def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo.
+def II_J : InstrItinClass;
+def II_JAL : InstrItinClass;
+def II_JALR : InstrItinClass;
+def II_JALRC : InstrItinClass;
+def II_JALRS : InstrItinClass;
+def II_JALS : InstrItinClass;
+def II_JR : InstrItinClass;
+def II_JRADDIUSP : InstrItinClass;
+def II_JRC : InstrItinClass;
+def II_ReturnPseudo : InstrItinClass; // Return pseudo.
def II_LB : InstrItinClass;
+def II_LBE : InstrItinClass;
def II_LBU : InstrItinClass;
+def II_LBUE : InstrItinClass;
def II_LD : InstrItinClass;
def II_LDC1 : InstrItinClass;
def II_LDL : InstrItinClass;
def II_LDR : InstrItinClass;
def II_LDXC1 : InstrItinClass;
def II_LH : InstrItinClass;
+def II_LHE : InstrItinClass;
def II_LHU : InstrItinClass;
+def II_LHUE : InstrItinClass;
def II_LUI : InstrItinClass;
def II_LUXC1 : InstrItinClass;
def II_LW : InstrItinClass;
+def II_LWE : InstrItinClass;
def II_LWC1 : InstrItinClass;
def II_LWL : InstrItinClass;
+def II_LWLE : InstrItinClass;
def II_LWR : InstrItinClass;
+def II_LWRE : InstrItinClass;
def II_LWU : InstrItinClass;
def II_LWXC1 : InstrItinClass;
def II_MADD : InstrItinClass;
@@ -134,6 +164,7 @@ def II_ROTRV : InstrItinClass;
def II_ROUND : InstrItinClass;
def II_SAVE : InstrItinClass;
def II_SB : InstrItinClass;
+def II_SBE : InstrItinClass;
def II_SD : InstrItinClass;
def II_SDC1 : InstrItinClass;
def II_SDL : InstrItinClass;
@@ -144,6 +175,7 @@ def II_SEH : InstrItinClass;
def II_SEQ_SNE : InstrItinClass; // seq and sne
def II_SEQI_SNEI : InstrItinClass; // seqi and snei
def II_SH : InstrItinClass;
+def II_SHE : InstrItinClass;
def II_SLL : InstrItinClass;
def II_SLLV : InstrItinClass;
def II_SLTI_SLTIU : InstrItinClass; // slti and sltiu
@@ -159,11 +191,15 @@ def II_SUB_D : InstrItinClass;
def II_SUB_S : InstrItinClass;
def II_SUXC1 : InstrItinClass;
def II_SW : InstrItinClass;
+def II_SWE : InstrItinClass;
def II_SWC1 : InstrItinClass;
def II_SWL : InstrItinClass;
+def II_SWLE : InstrItinClass;
def II_SWR : InstrItinClass;
+def II_SWRE : InstrItinClass;
def II_SWXC1 : InstrItinClass;
def II_TRUNC : InstrItinClass;
+def II_WSBH : InstrItinClass;
def II_XOR : InstrItinClass;
def II_XORI : InstrItinClass;
@@ -171,7 +207,7 @@ def II_XORI : InstrItinClass;
// Mips Generic instruction itineraries.
//===----------------------------------------------------------------------===//
def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
- InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIM16Alu , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDI , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDIU , [InstrStage<1, [ALU]>]>,
InstrItinData<II_ADDU , [InstrStage<1, [ALU]>]>,
@@ -240,7 +276,29 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_SAVE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SEQ_SNE , [InstrStage<1, [ALU]>]>,
InstrItinData<II_SEQI_SNEI , [InstrStage<1, [ALU]>]>,
- InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_B , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BBIT , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1F , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1FL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1T , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BC1TL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZ , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZAL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZALS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_BCCZC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_IndirectBranchPseudo, [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_J , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JAL , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALR , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALRC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALRS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JALS , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JR , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JRADDIUSP , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_JRC , [InstrStage<1, [ALU]>]>,
+ InstrItinData<II_ReturnPseudo , [InstrStage<1, [ALU]>]>,
InstrItinData<II_DMUL , [InstrStage<17, [IMULDIV]>]>,
InstrItinData<II_DMULT , [InstrStage<17, [IMULDIV]>]>,
InstrItinData<II_DMULTU , [InstrStage<17, [IMULDIV]>]>,
@@ -313,3 +371,5 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>,
InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>
]>;
+
+include "MipsScheduleP5600.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
new file mode 100644
index 0000000..d32ae4f
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -0,0 +1,392 @@
+//==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def MipsP5600Model : SchedMachineModel {
+ int IssueWidth = 2; // 2x dispatched per cycle
+ int MicroOpBufferSize = 48; // min(48, 48, 64)
+ int LoadLatency = 4;
+ int MispredictPenalty = 8; // TODO: Estimated
+
+ let CompleteModel = 1;
+}
+
+let SchedModel = MipsP5600Model in {
+
+// ALQ Pipelines
+// =============
+
+def P5600ALQ : ProcResource<1> { let BufferSize = 16; }
+def P5600IssueALU : ProcResource<1> { let Super = P5600ALQ; }
+
+// ALU Pipeline
+// ------------
+
+def P5600WriteALU : SchedWriteRes<[P5600IssueALU]>;
+
+// and, lui, nor, or, slti, sltiu, sub, subu, xor
+def : ItinRW<[P5600WriteALU],
+ [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUBU, II_XOR]>;
+
+// AGQ Pipelines
+// =============
+
+def P5600AGQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueAL2 : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueCTISTD : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueLDST : ProcResource<1> { let Super = P5600AGQ; }
+
+def P5600AL2Div : ProcResource<1>;
+// Pseudo-resource used to block CTISTD when handling multi-pipeline splits.
+def P5600CTISTD : ProcResource<1>;
+
+// CTISTD Pipeline
+// ---------------
+
+def P5600WriteJump : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]>;
+def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
+ let Latency = 2;
+}
+
+// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
+// jalr, jr.hb, jr
+def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR]>;
+def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR]>;
+
+// LDST Pipeline
+// -------------
+
+def P5600WriteLoad : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 4;
+}
+
+def P5600WriteLoadShifted : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+ let Latency = 4;
+}
+
+def P5600WritePref : SchedWriteRes<[P5600IssueLDST]>;
+
+def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+ // FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2
+ // not during 0, 1, and 2.
+ let ResourceCycles = [ 1, 3 ];
+}
+
+def P5600WriteGPRFromBypass : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 2;
+}
+
+def P5600WriteStoreFromOtherUnits : SchedWriteRes<[P5600IssueLDST]>;
+def P5600WriteLoadToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 0;
+}
+
+// l[bhw], l[bh]u, ll
+def : ItinRW<[P5600WriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LWU]>;
+
+// lw[lr]
+def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWR]>;
+
+// s[bhw], sw[lr]
+def : ItinRW<[P5600WriteStore], [II_SB, II_SH, II_SW, II_SWL, II_SWR]>;
+
+// pref
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WritePref], []>;
+
+// sc
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WriteStore], []>;
+
+// LDST is also used in moves from general purpose registers to floating point
+// and MSA.
+def P5600WriteMoveGPRToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+ let Latency = 0;
+}
+
+// AL2 Pipeline
+// ------------
+
+def P5600WriteAL2 : SchedWriteRes<[P5600IssueAL2]>;
+def P5600WriteAL2BitExt : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2ShadowMov : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2CondMov : SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+ let Latency = 2;
+}
+def P5600WriteAL2Div : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+ // Estimated worst case
+ let Latency = 34;
+ let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2DivU : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+ // Estimated worst case
+ let Latency = 34;
+ let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2Mul : SchedWriteRes<[P5600IssueAL2]> { let Latency = 3; }
+def P5600WriteAL2Mult: SchedWriteRes<[P5600IssueAL2]> { let Latency = 5; }
+def P5600WriteAL2MAdd: SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+ let Latency = 5;
+}
+
+// clo, clz, di, mfhi, mflo
+def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_MFHI_MFLO]>;
+
+// ehb, rdhwr, rdpgpr, wrpgpr, wsbh
+def : ItinRW<[P5600WriteAL2ShadowMov], [II_RDHWR]>;
+
+// mov[nz]
+def : ItinRW<[P5600WriteAL2CondMov], [II_MOVN, II_MOVZ]>;
+
+// divu?
+def : ItinRW<[P5600WriteAL2Div], [II_DIV]>;
+def : ItinRW<[P5600WriteAL2DivU], [II_DIVU]>;
+
+// mul
+def : ItinRW<[P5600WriteAL2Mul], [II_MUL]>;
+// multu?, multu?
+def : ItinRW<[P5600WriteAL2Mult], [II_MULT, II_MULTU]>;
+// maddu?, msubu?, mthi, mtlo
+def : ItinRW<[P5600WriteAL2MAdd],
+ [II_MADD, II_MADDU, II_MSUB, II_MSUBU, II_MTHI_MTLO]>;
+
+// ext, ins
+def : ItinRW<[P5600WriteAL2BitExt],
+ [II_EXT, II_INS]>;
+
+// Either ALU or AL2 Pipelines
+// ---------------------------
+//
+// Some instructions can choose between ALU and AL2, but once dispatched to
+// ALQ or AGQ respectively they are committed to that path.
+// The decision is based on the outcome of the most recent selection when the
+// choice was last available. For now, we assume ALU is always chosen.
+
+def P5600WriteEitherALU : SchedWriteVariant<
+ // FIXME: Implement selection predicate
+ [SchedVar<SchedPredicate<[{1}]>, [P5600WriteALU]>,
+ SchedVar<SchedPredicate<[{0}]>, [P5600WriteAL2]>
+ ]>;
+
+// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
+// xori
+def : ItinRW<[P5600WriteEitherALU],
+ [II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH,
+ II_SLT_SLTU, II_SLL, II_SRA, II_SRL, II_XORI, II_ADDU, II_SLLV,
+ II_SRAV, II_SRLV]>;
+
+// FPU Pipelines
+// =============
+
+def P5600FPQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueFPUS : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPUL : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPULoad : ProcResource<1> { let Super = P5600FPQ; }
+
+def P5600FPUDivSqrt : ProcResource<2>;
+
+def P5600WriteFPUS : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteFPUL : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 4; }
+def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6; }
+def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 23 / 27
+ let Latency = 23; // Using common case
+ let ResourceCycles = [ 1, 23 ];
+}
+def P5600WriteFPUDivD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 31 / 35
+ let Latency = 31; // Using common case
+ let ResourceCycles = [ 1, 31 ];
+}
+def P5600WriteFPURcpS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 19 / 23
+ let Latency = 19; // Using common case
+ let ResourceCycles = [ 1, 19 ];
+}
+def P5600WriteFPURcpD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 27
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 27 / 31
+ let Latency = 27; // Using common case
+ let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+ // Best/Common/Worst case = 7 / 35 / 39
+ let Latency = 35; // Using common case
+ let ResourceCycles = [ 1, 35 ];
+}
+def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; }
+def P5600WriteMoveOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPUS]>;
+
+// FPUS is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPUSToOtherUnits : SchedWriteRes<[P5600IssueFPUS]> {
+ let Latency = 0;
+}
+
+// FPUL is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPULToOtherUnits : SchedWriteRes<[P5600IssueFPUL]>;
+
+// Short Pipe
+// ----------
+//
+// abs.[ds], abs.ps, bc1[tf]l?, mov[tf].[ds], mov[tf], mov.[ds], [cm][ft]c1,
+// m[ft]hc1, neg.[ds], neg.ps, nor.v, nori.b, or.v, ori.b, xor.v, xori.b,
+// sdxc1, sdc1, st.[bhwd], swc1, swxc1
+def : ItinRW<[P5600WriteFPUS], [II_ABS, II_MOVF_D, II_MOVF_S, II_MOVT_D,
+ II_MOVT_S, II_MOV_D, II_MOV_S, II_NEG]>;
+
+// adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd],
+// aver?_[us].[bhwd]
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>;
+// TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it.
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
+
+// and.v, andi.b, move.v, ldi.[bhwd]
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+
+// Long Pipe
+// ----------
+//
+// add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps,
+// cvt.ps.[sw], c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps, sub.[ds], sub.ps,
+// trunc.w.[ds], trunc.w.ps
+def : ItinRW<[P5600WriteFPUL],
+ [II_ADD_D, II_ADD_S, II_CVT, II_C_CC_D, II_C_CC_S, II_MUL_D,
+ II_MUL_S, II_SUB_D, II_SUB_S, II_TRUNC]>;
+
+// div.[ds], div.ps
+def : ItinRW<[P5600WriteFPUDivS], [II_DIV_S]>;
+def : ItinRW<[P5600WriteFPUDivD], [II_DIV_D]>;
+
+// sqrt.[ds], sqrt.ps
+def : ItinRW<[P5600WriteFPUSqrtS], [II_SQRT_S]>;
+def : ItinRW<[P5600WriteFPUSqrtD], [II_SQRT_D]>;
+
+// madd.[ds], msub.[ds], nmadd.[ds], nmsub.[ds],
+// Operand 0 is read on cycle 5. All other operands are read on operand 0.
+def : ItinRW<[SchedReadAdvance<5>, P5600WriteFPUL_MADDSUB],
+ [II_MADD_D, II_MADD_S, II_MSUB_D, II_MSUB_S, II_NMADD_D,
+ II_NMADD_S, II_NMSUB_D, II_NMSUB_S]>;
+
+// madd.ps, msub.ps, nmadd.ps, nmsub.ps
+// Operand 0 and 1 are read on cycle 5. All others are read on operand 0.
+// (none of these instructions exist in the backend yet)
+
+// Load Pipe
+// ---------
+//
+// This is typically used in conjunction with the load pipeline under the AGQ
+// All the instructions are in the 'Tricky Instructions' section.
+
+def P5600WriteLoadOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPULoad]> {
+ let Latency = 4;
+}
+
+// Tricky Instructions
+// ===================
+//
+// These instructions are split across multiple uops (in different pipelines)
+// that must cooperate to complete the operation
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteMoveGPRToFPU : WriteSequence<[P5600WriteMoveGPRToOtherUnits,
+ P5600WriteMoveOtherUnitsToFPU]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteMoveFPUToGPR : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+ P5600WriteGPRFromBypass]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteStoreFPUS : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+ P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteStoreFPUL : WriteSequence<[P5600WriteMoveFPULToOtherUnits,
+ P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+// current aggregates the resources and ignores the exact cycle they are
+// used.
+def P5600WriteLoadFPU : WriteSequence<[P5600WriteLoadToOtherUnits,
+ P5600WriteLoadOtherUnitsToFPU]>;
+
+// ctc1, mtc1, mthc1
+def : ItinRW<[P5600WriteMoveGPRToFPU], [II_CTC1, II_MTC1, II_MTHC1]>;
+
+// bc1[ft], cfc1, mfc1, mfhc1, movf, movt
+def : ItinRW<[P5600WriteMoveFPUToGPR],
+ [II_BC1F, II_BC1T, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>;
+
+// swc1, swxc1, st.[bhwd]
+def : ItinRW<[P5600WriteStoreFPUS], [II_SWC1, II_SWXC1]>;
+def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
+
+// movn.[ds], movz.[ds]
+def : ItinRW<[P5600WriteStoreFPUL], [II_MOVN_D, II_MOVN_S, II_MOVZ_D, II_MOVZ_S]>;
+
+// l[dw]x?c1, ld.[bhwd]
+def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1]>;
+def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
+
+// Unsupported Instructions
+// ========================
+//
+// The following instruction classes are never valid on P5600.
+// II_DADDIU, II_DADDU, II_DMFC1, II_DMTC1, II_DMULT, II_DMULTU, II_DROTR,
+// II_DROTR32, II_DROTRV, II_DDIV, II_DSLL, II_DSLL32, II_DSLLV, II_DSRA,
+// II_DSRA32, II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV, II_DSUBU, II_DDIVU,
+// II_JALRC, II_LD, II_LD[LR], II_LUXC1, II_RESTORE, II_SAVE, II_SD, II_SDC1,
+// II_SDL, II_SDR, II_SDXC1
+//
+// The following instructions are never valid on P5600.
+// addq.ph, rdhwr, repl.ph, repl.qb, subq.ph, subu_s.qb
+//
+// Guesswork
+// =========
+//
+// This section is largely temporary guesswork.
+
+// ceil.[lw].[ds], floor.[lw].[ds]
+// Reason behind guess: trunc.[lw].ds and the various cvt's are in FPUL
+def : ItinRW<[P5600WriteFPUL], [II_CEIL, II_FLOOR, II_ROUND]>;
+
+// rotrv
+// Reason behind guess: rotr is in the same category and the two register forms
+// generally follow the immediate forms in this category
+def : ItinRW<[P5600WriteEitherALU], [II_ROTRV]>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 471b6e1..8a18b51 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -69,8 +69,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
- HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(),
+ HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+ Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM),
+ TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
index 1db8881..fbb01fe 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h
@@ -42,9 +42,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
Mips3, Mips4, Mips5, Mips64, Mips64r2, Mips64r3, Mips64r5, Mips64r6
};
+ enum class CPU { P5600 };
+
// Mips architecture version
MipsArchEnum MipsArchVersion;
+ // Processor implementation (unused but required to exist by
+ // tablegen-erated code).
+ CPU ProcImpl;
+
// IsLittle - The target is Little Endian
bool IsLittle;
@@ -116,8 +122,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// InMicroMips -- can process MicroMips instructions
bool InMicroMipsMode;
- // HasDSP, HasDSPR2 -- supports DSP ASE.
- bool HasDSP, HasDSPR2;
+ // HasDSP, HasDSPR2, HasDSPR3 -- supports DSP ASE.
+ bool HasDSP, HasDSPR2, HasDSPR3;
// Allow mixed Mips16 and Mips32 in one source file
bool AllowMixed16_32;
@@ -130,6 +136,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// HasMSA -- supports MSA ASE.
bool HasMSA;
+ // UseTCCInDIV -- Enables the use of trapping in the assembler.
+ bool UseTCCInDIV;
+
+ // HasEVA -- supports EVA ASE.
+ bool HasEVA;
+
InstrItineraryData InstrItins;
// We can override the determination of whether we are in mips16 mode
@@ -189,7 +201,7 @@ public:
}
bool hasMips32r5() const {
return (MipsArchVersion >= Mips32r5 && MipsArchVersion < Mips32Max) ||
- hasMips64r2();
+ hasMips64r5();
}
bool hasMips32r6() const {
return (MipsArchVersion >= Mips32r6 && MipsArchVersion < Mips32Max) ||
@@ -228,9 +240,12 @@ public:
}
bool inMicroMipsMode() const { return InMicroMipsMode; }
bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+ bool inMicroMips64r6Mode() const { return InMicroMipsMode && hasMips64r6(); }
bool hasDSP() const { return HasDSP; }
bool hasDSPR2() const { return HasDSPR2; }
+ bool hasDSPR3() const { return HasDSPR3; }
bool hasMSA() const { return HasMSA; }
+ bool hasEVA() const { return HasEVA; }
bool useSmallSection() const { return UseSmallSection; }
bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 1c77745..3e63872 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -233,7 +233,7 @@ void MipsPassConfig::addPreRegAlloc() {
}
TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
if (Subtarget->allowMixed16_32()) {
DEBUG(errs() << "No Target Transform Info Pass Added\n");
// FIXME: This is no longer necessary as the TTI returned is per-function.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0f2db60..146f33b 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -76,7 +76,7 @@ bool MipsTargetObjectFile::
IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
SectionKind Kind) const {
return (IsGlobalInSmallSectionImpl(GV, TM) &&
- (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon()));
+ (Kind.isData() || Kind.isBSS() || Kind.isCommon()));
}
/// Return true if this global address should be placed into small data/bss
@@ -107,7 +107,8 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV,
return false;
Type *Ty = GV->getType()->getElementType();
- return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+ return IsInSmallSection(
+ GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
}
MCSection *
@@ -120,7 +121,7 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
// Handle Small Section classification here.
if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallBSSSection;
- if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
return SmallDataSection;
// Otherwise, we work the same as ELF.
@@ -128,21 +129,20 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
}
/// Return true if this constant should be placed into small data section.
-bool MipsTargetObjectFile::
-IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
+bool MipsTargetObjectFile::IsConstantInSmallSection(
+ const DataLayout &DL, const Constant *CN, const TargetMachine &TM) const {
return (static_cast<const MipsTargetMachine &>(TM)
.getSubtargetImpl()
->useSmallSection() &&
- LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
- CN->getType())));
+ LocalSData && IsInSmallSection(DL.getTypeAllocSize(CN->getType())));
}
-MCSection *
-MipsTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
- if (IsConstantInSmallSection(C, *TM))
+/// Return true if this constant should be placed into small data section.
+MCSection *MipsTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
+ if (IsConstantInSmallSection(DL, C, *TM))
return SmallDataSection;
// Otherwise, we work the same as ELF.
- return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C);
+ return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C);
}
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index 725f2ff..ba04343 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -36,10 +36,10 @@ class MipsTargetMachine;
const TargetMachine &TM) const override;
/// Return true if this constant should be placed into small data section.
- bool IsConstantInSmallSection(const Constant *CN,
+ bool IsConstantInSmallSection(const DataLayout &DL, const Constant *CN,
const TargetMachine &TM) const;
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
index 6ce1be7..b3222f5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h
@@ -12,6 +12,7 @@
#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "MCTargetDesc/MipsABIInfo.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -77,8 +78,12 @@ public:
// PIC support
virtual void emitDirectiveCpLoad(unsigned RegNo);
+ virtual void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg);
+ virtual void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister);
// FP abiflags directives
virtual void emitDirectiveModuleFP();
@@ -97,18 +102,18 @@ public:
// structure values.
template <class PredicateLibrary>
void updateABIInfo(const PredicateLibrary &P) {
- ABI = &P.getABI();
+ ABI = P.getABI();
ABIFlagsSection.setAllFromPredicates(P);
}
MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
const MipsABIInfo &getABI() const {
- assert(ABI && "ABI hasn't been set!");
+ assert(ABI.hasValue() && "ABI hasn't been set!");
return *ABI;
}
protected:
- const MipsABIInfo *ABI;
+ llvm::Optional<MipsABIInfo> ABI;
MipsABIFlagsSection ABIFlagsSection;
bool GPRInfoSet;
@@ -188,8 +193,12 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) override;
// FP abiflags directives
void emitDirectiveModuleFP() override;
@@ -237,8 +246,12 @@ public:
// PIC support
void emitDirectiveCpLoad(unsigned RegNo) override;
+ void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+ int Offset) override;
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ void emitDirectiveCpreturn(unsigned SaveLocation,
+ bool SaveLocationIsRegister) override;
void emitMipsAbiFlags();
};
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 02c5a21..f0f223a 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -15,11 +15,9 @@
#define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
-class MCOperand;
class MCSubtargetInfo;
class NVPTXInstPrinter : public MCInstPrinter {
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index b432e06..9ac3c88 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -22,6 +22,7 @@ class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
+
public:
explicit NVPTXMCAsmInfo(const Triple &TheTriple);
};
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index fe28214..e5fae85 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -41,24 +41,6 @@ enum CondCodes {
};
}
-inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
- switch (CC) {
- case NVPTXCC::NE:
- return "ne";
- case NVPTXCC::EQ:
- return "eq";
- case NVPTXCC::LT:
- return "lt";
- case NVPTXCC::LE:
- return "le";
- case NVPTXCC::GT:
- return "gt";
- case NVPTXCC::GE:
- return "ge";
- }
- llvm_unreachable("Unknown condition code");
-}
-
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ecb0f0a..e8c3608 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
if (isABI) {
if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+ if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
return false;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->getName() == "llvm.used")
- return false;
- return true;
+ return GV->getName() != "llvm.used";
}
for (const User *U : C->users())
@@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
if (!gv->hasInternalLinkage())
return false;
- const PointerType *Pty = gv->getType();
+ PointerType *Pty = gv->getType();
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
return false;
@@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C,
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
llvm::DenseMap<const Function *, bool> seenMap;
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- const Function *F = FI;
+ const Function *F = &*FI;
if (F->isDeclaration()) {
if (F->use_empty())
@@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
DenseSet<const GlobalVariable *> GVVisiting;
// Visit each global variable, in order
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+ for (const GlobalVariable &I : M.globals())
+ VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
assert(GVVisited.size() == M.getGlobalList().size() &&
"Missed a global variable");
@@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
GVar->getName().startswith("nvvm."))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
if (GVar->hasExternalLinkage()) {
@@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
printScalarConstant(Initializer, O);
}
} else {
- // The frontend adds zero-initializer to variables that don't have an
- // initial value, so skip warning for this case.
- if (!GVar->getInitializer()->isNullValue()) {
+ // The frontend adds zero-initializer to device and constant variables
+ // that don't have an initial value, and UndefValue to shared
+ // variables, so skip warning for this case.
+ if (!GVar->getInitializer()->isNullValue() &&
+ !isa<UndefValue>(GVar->getInitializer())) {
report_fatal_error("initial value of '" + GVar->getName() +
"' is not allowed in addrspace(" +
Twine(PTy->getAddressSpace()) + ")");
@@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
// Ptx allows variable initilization only for constant and
// global state spaces.
if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
@@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
}
std::string
-NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
switch (Ty->getTypeID()) {
default:
llvm_unreachable("unexpected type");
@@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// GlobalVariables are always constant pointers themselves.
- const PointerType *PTy = GVar->getType();
+ PointerType *PTy = GVar->getType();
Type *ETy = PTy->getElementType();
O << ".";
emitPTXAddressSpace(PTy->getAddressSpace(), O);
if (GVar->getAlignment() == 0)
- O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+ O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
else
O << " .align " << GVar->getAlignment();
@@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
- ElementSize = TD->getTypeStoreSize(ETy);
+ ElementSize = DL.getTypeStoreSize(ETy);
O << " .b8 ";
getSymbol(GVar)->print(O, MAI);
O << "[";
@@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
return;
}
-static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
if (Ty->isSingleValueType())
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPrefTypeAlignment(Ty);
- const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+ auto *ATy = dyn_cast<ArrayType>(Ty);
if (ATy)
- return getOpenCLAlignment(TD, ATy->getElementType());
+ return getOpenCLAlignment(DL, ATy->getElementType());
- const StructType *STy = dyn_cast<StructType>(Ty);
+ auto *STy = dyn_cast<StructType>(Ty);
if (STy) {
unsigned int alignStruct = 1;
// Go through each element of the struct and find the
// largest alignment.
for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
Type *ETy = STy->getElementType(i);
- unsigned int align = getOpenCLAlignment(TD, ETy);
+ unsigned int align = getOpenCLAlignment(DL, ETy);
if (align > alignStruct)
alignStruct = align;
}
return alignStruct;
}
- const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+ auto *FTy = dyn_cast<FunctionType>(Ty);
if (FTy)
- return TD->getPointerPrefAlignment();
- return TD->getPrefTypeAlignment(Ty);
+ return DL.getPointerPrefAlignment();
+ return DL.getPrefTypeAlignment(Ty);
}
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
@@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
O << "_param_" << paramIndex;
}
-void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
-}
-
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const AttributeSet &PAL = F->getAttributes();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
@@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool first = true;
bool isKernelFunc = llvm::isKernelFunction(*F);
bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
- MVT thePointerTy = TLI->getPointerTy(*TD);
+ MVT thePointerTy = TLI->getPointerTy(DL);
O << "(\n";
@@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(Ty);
+ align = DL.getABITypeAlignment(Ty);
- unsigned sz = TD->getTypeAllocSize(Ty);
+ unsigned sz = DL.getTypeAllocSize(Ty);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
continue;
}
// Just a scalar
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (isKernelFunc) {
if (PTy) {
// Special handling for pointer arguments to kernel
@@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ".ptr .global ";
break;
}
- O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
+ O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
}
printParamName(I, paramIndex, O);
continue;
@@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
// param has byVal attribute. So should be a pointer
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// size = typeallocsize of element type
unsigned align = PAL.getParamAlignment(paramIndex + 1);
if (align == 0)
- align = TD->getABITypeAlignment(ETy);
+ align = DL.getABITypeAlignment(ETy);
- unsigned sz = TD->getTypeAllocSize(ETy);
+ unsigned sz = DL.getTypeAllocSize(ETy);
O << "\t.param .align " << align << " .b8 ";
printParamName(I, paramIndex, O);
O << "[" << sz << "]";
@@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
// Further, if a part is vector, print the above for
// each vector element.
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
+ ComputeValueVTs(*TLI, DL, ETy, vtparts);
for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
unsigned elems = 1;
EVT elemtype = vtparts[i];
@@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) {
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- int s = TD->getTypeAllocSize(CPV->getType());
+ int s = DL.getTypeAllocSize(CPV->getType());
if (s < Bytes)
s = Bytes;
aggBuffer->addZeros(s);
@@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
switch (CPV->getType()->getTypeID()) {
case Type::IntegerTyID: {
- const Type *ETy = CPV->getType();
+ Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
ConvertIntToBytes<>(ptr, c);
@@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
int int32 = (int)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstantExpression(Cexpr, *TD))) {
+ ConstantFoldConstantExpression(Cexpr, DL))) {
long long int64 = (long long)(constInt->getZExtValue());
ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::FloatTyID:
case Type::DoubleTyID: {
const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
- const Type *Ty = CFP->getType();
+ Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
ConvertFloatToBytes(ptr, float32);
@@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
const Value *v = Cexpr->stripPointerCasts();
aggBuffer->addSymbol(v, Cexpr);
}
- unsigned int s = TD->getTypeAllocSize(CPV->getType());
+ unsigned int s = DL.getTypeAllocSize(CPV->getType());
aggBuffer->addZeros(s);
break;
}
@@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::StructTyID: {
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
- int ElementSize = TD->getTypeAllocSize(CPV->getType());
+ int ElementSize = DL.getTypeAllocSize(CPV->getType());
bufferAggregateConstant(CPV, aggBuffer);
if (Bytes > ElementSize)
aggBuffer->addZeros(Bytes - ElementSize);
@@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
AggBuffer *aggBuffer) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
int Bytes;
// Old constants
@@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
StructType *ST = cast<StructType>(CPV->getType());
for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
if (i == (e - 1))
- Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
- TD->getTypeAllocSize(ST) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
+ DL.getTypeAllocSize(ST) -
+ DL.getStructLayout(ST)->getElementOffset(i);
else
- Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
- TD->getStructLayout(ST)->getElementOffset(i);
+ Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
+ DL.getStructLayout(ST)->getElementOffset(i);
bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
}
}
@@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
// buildTypeNameMap - Run through symbol table looking for type names.
//
-bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
-
- std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
-
- if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
- !PI->second.compare("struct._image2d_t") ||
- !PI->second.compare("struct._image3d_t")))
- return true;
-
- return false;
-}
-
bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// If the code isn't optimized, there may be outstanding folding
// opportunities. Attempt to fold the expression using DataLayout as a
// last resort before giving up.
- if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+ if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
if (C != CE)
return lowerConstantForGV(C, ProcessingGeneric);
@@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::GetElementPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Generate a symbolic expression for the byte address
APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
@@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
case Instruction::IntToPtr: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Handle casts to pointers by changing them into casts to the appropriate
// integer type. This promotes constant folding and simplifies this code.
@@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
}
case Instruction::PtrToInt: {
- const DataLayout &DL = *TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
// Support only foldable casts to/from pointers that can be eliminated by
// changing the pointer to the appropriately sized integer type.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index f6f7685..76bf179 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -212,28 +212,21 @@ private:
MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
-
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = nullptr);
- void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
- void printParamName(int paramIndex, raw_ostream &O);
void printParamName(Function::const_arg_iterator I, int paramIndex,
raw_ostream &O);
void emitGlobals(const Module &M);
void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
- void emitFunctionExternParamList(const MachineFunction &MF);
void emitFunctionParamList(const Function *, raw_ostream &O);
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
- void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
- bool isImageType(const Type *Ty);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -271,7 +264,7 @@ private:
// Build the map between type name and ID based on module's type
// symbol table.
- std::map<const Type *, std::string> TypeNameMap;
+ std::map<Type *, std::string> TypeNameMap;
// List of variables demoted to a function scope.
std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
@@ -282,19 +275,15 @@ private:
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
- std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+ std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
void printScalarConstant(const Constant *CPV, raw_ostream &O);
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
- void printOperandProper(const MachineOperand &MO);
-
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
void emitDeclarations(const Module &, raw_ostream &O);
void emitDeclaration(const Function *, raw_ostream &O);
-
- static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 69a229e..95813c8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -98,7 +98,7 @@ private:
/// This reordering exposes to optimizeMemoryInstruction more
/// optimization opportunities on loads and stores.
///
- /// If this function succesfully hoists an eliminable addrspacecast or V is
+ /// If this function successfully hoists an eliminable addrspacecast or V is
/// already such an addrspacecast, it returns the transformed value (which is
/// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
@@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
return false;
bool Changed = false;
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+ for (BasicBlock &B : F) {
+ for (Instruction &I : B) {
if (isa<LoadInst>(I)) {
// V = load P
- Changed |= optimizeMemoryInstruction(I, 0);
+ Changed |= optimizeMemoryInstruction(&I, 0);
} else if (isa<StoreInst>(I)) {
// store V, P
- Changed |= optimizeMemoryInstruction(I, 1);
+ Changed |= optimizeMemoryInstruction(&I, 1);
}
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 6fd09c4..62ca5e9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E;) {
- GlobalVariable *GV = I++;
+ GlobalVariable *GV = &*I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
!llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
@@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
Value *Operand = II->getOperand(i);
if (isa<Constant>(Operand)) {
II->setOperand(
- i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+ i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder));
}
}
}
@@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
// Walk through the metadata section and update the debug information
// associated with the global variables in the default address space.
- for (Module::named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end();
- I != E; I++) {
- remapNamedMDNode(VM, I);
+ for (NamedMDNode &I : M.named_metadata()) {
+ remapNamedMDNode(VM, &I);
}
// Walk through the global variable initializers, and replace any use of
@@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
NewOperands[0], NewOperands[1]);
case Instruction::FCmp:
// CompareConstantExpr (fcmp)
- assert(false && "Address space conversion should have no effect "
- "on float point CompareConstantExpr (fcmp)!");
- return C;
+ llvm_unreachable("Address space conversion should have no effect "
+ "on float point CompareConstantExpr (fcmp)!");
case Instruction::ExtractElement:
// ExtractElementConstantExpr
return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
@@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
NewOperands[0], C->getType());
}
- assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
- return C;
+ llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 232a611..2d0098b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/CommandLine.h"
@@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+ if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
switch (PT->getAddressSpace()) {
case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
@@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
return NVPTX::PTXLdStInstCode::GENERIC;
}
+static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
+ unsigned CodeAddrSpace, MachineFunction *F) {
+ // To use non-coherent caching, the load has to be from global
+ // memory and we have to prove that the memory area is not written
+ // to anywhere for the duration of the kernel call, not even after
+ // the load.
+ //
+ // To ensure that there are no writes to the memory, we require the
+ // underlying pointer to be a noalias (__restrict) kernel parameter
+ // that is never used for a write. We can only do this for kernel
+ // functions since from within a device function, we cannot know if
+ // there were or will be writes to the memory from the caller - or we
+ // could, but then we would have to do inter-procedural analysis.
+ if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+ !isKernelFunction(*F->getFunction())) {
+ return false;
+ }
+
+ // We use GetUnderlyingObjects() here instead of
+ // GetUnderlyingObject() mainly because the former looks through phi
+ // nodes while the latter does not. We need to look through phi
+ // nodes to handle pointer induction variables.
+ SmallVector<Value *, 8> Objs;
+ GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+ Objs, F->getDataLayout());
+ for (Value *Obj : Objs) {
+ auto *A = dyn_cast<const Argument>(Obj);
+ if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+ }
+
+ return true;
+}
+
SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
@@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD);
+ if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool isVolatile = LD->isVolatile();
@@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
+ if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
+ return SelectLDGLDU(N);
+ }
+
// Volatile Setting
// - .volatile is only availalble for .global and .shared
bool IsVolatile = MemSD->isVolatile();
@@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG) {
switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
}
}
break;
+ case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
break;
}
break;
+ case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
}
if (!Src)
return false;
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (auto *PT = dyn_cast<PointerType>(Src->getType()))
return (PT->getAddressSpace() == spN);
return false;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b75cf40..7663696 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// condition branches.
setJumpIsExpensive(true);
+ // Wide divides are _very_ slow. Try to reduce the width of the divide if
+ // possible.
+ addBypassSlowDiv(64, 32);
+
// By default, use the Source scheduling
if (sched4reg)
setSchedulingPreference(Sched::RegPressure);
@@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SELECT);
// Now deduce the information based on the above mentioned
// actions
@@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "(";
if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
unsigned size = 0;
- if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+ if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
if (size < 32)
size = 32;
@@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << "_";
continue;
}
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ auto *PTy = dyn_cast<PointerType>(Ty);
assert(PTy && "Param with byval attribute should be a pointer type");
Type *ETy = PTy->getElementType();
@@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// struct or vector
SmallVector<EVT, 16> vtparts;
SmallVector<uint64_t, 16> Offsets;
- const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+ auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
vtparts, &Offsets, 0);
@@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
-SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
- int idx, EVT v) const {
- std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
- std::stringstream suffix;
- suffix << idx;
- *name += suffix.str();
- return DAG.getTargetExternalSymbol(name->c_str(), v);
-}
-
SDValue
NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
std::string ParamSym;
@@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
}
-SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
- return getExtSymb(DAG, ".HLPPARAM", idx);
-}
-
// Check to see if the kernel argument is image*_t or sampler_t
bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
@@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
"struct._image3d_t",
"struct._sampler_t" };
- const Type *Ty = arg->getType();
- const PointerType *PTy = dyn_cast<PointerType>(Ty);
+ Type *Ty = arg->getType();
+ auto *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return false;
@@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
if (!context)
return false;
- const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+ auto *STy = dyn_cast<StructType>(PTy->getElementType());
const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
- for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
- if (TypeName == specialTypes[i])
- return true;
-
- return false;
+ return std::find(std::begin(specialTypes), std::end(specialTypes),
+ TypeName) != std::end(specialTypes);
}
SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
std::vector<Type *> argTypes;
std::vector<const Argument *> theArgs;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I) {
- theArgs.push_back(I);
- argTypes.push_back(I->getType());
+ for (const Argument &I : F->args()) {
+ theArgs.push_back(&I);
+ argTypes.push_back(I.getType());
}
// argTypes.size() (or theArgs.size()) and Ins.size() need not match.
// Ins.size() will be larger
@@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
-// NVPTX suuport vector of legal types of any length in Intrinsics because the
-// NVPTX specific type legalizer
-// will legalize them to the PTX supported length.
-bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
- if (isTypeLegal(VT))
- return true;
- if (VT.isVector()) {
- MVT eVT = VT.getVectorElementType();
- if (isTypeLegal(eVT))
- return true;
- }
- return false;
-}
-
static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
switch (Intrinsic) {
default:
@@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
// - [immAddr]
if (AM.BaseGV) {
- if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
- return false;
- return true;
+ return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
}
switch (AM.Scale) {
@@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
}
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
- return 4;
-}
-
//===----------------------------------------------------------------------===//
// NVPTX DAG Combining
//===----------------------------------------------------------------------===//
@@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // Currently this detects patterns for integer min and max and
+ // lowers them to PTX-specific intrinsics that enable hardware
+ // support.
+
+ const SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC) return SDValue();
+
+ const SDValue LHS = Cond.getOperand(0);
+ const SDValue RHS = Cond.getOperand(1);
+ const SDValue True = N->getOperand(1);
+ const SDValue False = N->getOperand(2);
+ if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+ return SDValue();
+
+ const EVT VT = N->getValueType(0);
+ if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
+
+ const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue Larger; // The larger of LHS and RHS when condition is true.
+ switch (CC) {
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ Larger = RHS;
+ break;
+
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ Larger = LHS;
+ break;
+
+ default:
+ return SDValue();
+ }
+ const bool IsMax = (Larger == True);
+ const bool IsSigned = ISD::isSignedIntSetCC(CC);
+
+ unsigned IntrinsicId;
+ if (VT == MVT::i32) {
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
+ } else {
+ assert(VT == MVT::i64);
+ if (IsSigned)
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
+ else
+ IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
+}
+
enum OperandSignedness {
Signed = 0,
Unsigned,
@@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
APInt Val = CI->getAPIntValue();
if (LHSSign == Unsigned) {
- if (Val.isIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isIntN(OptSize);
} else {
- if (Val.isSignedIntN(OptSize)) {
- return true;
- }
- return false;
+ return Val.isSignedIntN(OptSize);
}
} else {
OperandSignedness RHSSign;
if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
return false;
- if (LHSSign != RHSSign)
- return false;
-
- return true;
+ return LHSSign == RHSSign;
}
}
@@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
+ case ISD::SELECT:
+ return PerformSELECTCombine(N, DCI);
}
return SDValue();
}
@@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults(
void NVPTXSection::anchor() {}
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
- delete TextSection;
- delete DataSection;
- delete BSSSection;
- delete ReadOnlySection;
-
- delete StaticCtorSection;
- delete StaticDtorSection;
- delete LSDASection;
- delete EHFrameSection;
- delete DwarfAbbrevSection;
- delete DwarfInfoSection;
- delete DwarfLineSection;
- delete DwarfFrameSection;
- delete DwarfPubTypesSection;
- delete DwarfDebugInlineSection;
- delete DwarfStrSection;
- delete DwarfLocSection;
- delete DwarfARangesSection;
- delete DwarfRangesSection;
+ delete static_cast<NVPTXSection *>(TextSection);
+ delete static_cast<NVPTXSection *>(DataSection);
+ delete static_cast<NVPTXSection *>(BSSSection);
+ delete static_cast<NVPTXSection *>(ReadOnlySection);
+
+ delete static_cast<NVPTXSection *>(StaticCtorSection);
+ delete static_cast<NVPTXSection *>(StaticDtorSection);
+ delete static_cast<NVPTXSection *>(LSDASection);
+ delete static_cast<NVPTXSection *>(EHFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
+ delete static_cast<NVPTXSection *>(DwarfInfoSection);
+ delete static_cast<NVPTXSection *>(DwarfLineSection);
+ delete static_cast<NVPTXSection *>(DwarfFrameSection);
+ delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
+ delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
+ delete static_cast<NVPTXSection *>(DwarfStrSection);
+ delete static_cast<NVPTXSection *>(DwarfLocSection);
+ delete static_cast<NVPTXSection *>(DwarfARangesSection);
+ delete static_cast<NVPTXSection *>(DwarfRangesSection);
}
MCSection *
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index e5c3732..60914c1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -441,13 +441,9 @@ public:
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
- SelectionDAG &DAG) const;
const char *getTargetNodeName(unsigned Opcode) const override;
- bool isTypeSupportedInIntrinsic(MVT VT) const;
-
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
unsigned Intrinsic) const override;
@@ -459,8 +455,13 @@ public:
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
+ bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
+ // Truncating 64-bit to 32-bit is free in SASS.
+ if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
+ return false;
+ return SrcTy->getPrimitiveSizeInBits() == 64 &&
+ DstTy->getPrimitiveSizeInBits() == 32;
+ }
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const override {
@@ -515,11 +516,7 @@ public:
private:
const NVPTXSubtarget &STI; // cache the subtarget here
-
- SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
- EVT = MVT::i32) const;
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
- SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 76d6597..9f3cf45 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg(
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
- if (DestRC != SrcRC)
- report_fatal_error("Attempted to created cross-class register copy");
-
- if (DestRC == &NVPTX::Int32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int1RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float32RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int16RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Int64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else if (DestRC == &NVPTX::Float64RegsRegClass)
- BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- else {
+ if (DestRC->getSize() != SrcRC->getSize())
+ report_fatal_error("Copy one register into another with a different width");
+
+ unsigned Op;
+ if (DestRC == &NVPTX::Int1RegsRegClass) {
+ Op = NVPTX::IMOV1rr;
+ } else if (DestRC == &NVPTX::Int16RegsRegClass) {
+ Op = NVPTX::IMOV16rr;
+ } else if (DestRC == &NVPTX::Int32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
+ : NVPTX::BITCONVERT_32_F2I);
+ } else if (DestRC == &NVPTX::Int64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
+ : NVPTX::BITCONVERT_64_F2I);
+ } else if (DestRC == &NVPTX::Float32RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
+ : NVPTX::BITCONVERT_32_I2F);
+ } else if (DestRC == &NVPTX::Float64RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
+ : NVPTX::BITCONVERT_64_I2F);
+ } else {
llvm_unreachable("Bad register copy");
}
+ BuildMI(MBB, I, DL, get(Op), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
return false;
}
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case NVPTX::INT_PTX_SREG_NTID_X:
- case NVPTX::INT_PTX_SREG_NTID_Y:
- case NVPTX::INT_PTX_SREG_NTID_Z:
- case NVPTX::INT_PTX_SREG_TID_X:
- case NVPTX::INT_PTX_SREG_TID_Y:
- case NVPTX::INT_PTX_SREG_TID_Z:
- case NVPTX::INT_PTX_SREG_CTAID_X:
- case NVPTX::INT_PTX_SREG_CTAID_Y:
- case NVPTX::INT_PTX_SREG_CTAID_Z:
- case NVPTX::INT_PTX_SREG_NCTAID_X:
- case NVPTX::INT_PTX_SREG_NCTAID_Y:
- case NVPTX::INT_PTX_SREG_NCTAID_Z:
- case NVPTX::INT_PTX_SREG_WARPSIZE:
- return true;
- }
-}
-
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
unsigned &AddrSpace) const {
bool isLoad = false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 179c068..3e40722 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -56,7 +56,6 @@ public:
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
- bool isReadSpecialReg(MachineInstr &MI) const;
virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 0bf72fe..f770c2a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -6,6 +6,8 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
+// \file
// Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
// the size is large or is not a compile-time constant.
//
@@ -18,19 +20,20 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#define DEBUG_TYPE "nvptx"
using namespace llvm;
namespace {
+
// actual analysis class, which is a functionpass
struct NVPTXLowerAggrCopies : public FunctionPass {
static char ID;
@@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
return "Lower aggregate copies/intrinsics into loops";
}
};
-} // namespace
char NVPTXLowerAggrCopies::ID = 0;
-// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(
- Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
- bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
- Type *indType = len->getType();
+// Lower memcpy to loop.
+void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
- // srcAddr and dstAddr are expected to be pointer types,
+ // SrcAddr and DstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
- srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
- dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
- IRBuilder<> loop(loopBB);
- // The loop index (ind) is a phi node.
- PHINode *ind = loop.CreatePHI(indType, 0);
- // Incoming value for ind is 0
- ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
- // load from srcAddr+ind
+ // load from SrcAddr+LoopIndex
// TODO: we can leverage the align parameter of llvm.memcpy for more efficient
// word-sized loads and stores.
- Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
- srcVolatile);
- // store at dstAddr+ind
- loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
- dstVolatile);
-
- // The value for ind coming from backedge is (ind + 1)
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
- ind->addIncoming(newind, loopBB);
-
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
-// Lower MemSetInst to loop
-static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
- Value *len, Value *val, LLVMContext &Context,
- Function &F) {
- BasicBlock *origBB = splitAt->getParent();
- BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
- BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+ Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+ bool DstIsVolatile, LLVMContext &Context,
+ Function &F) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = ConvertedInst->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
- origBB->getTerminator()->setSuccessor(0, loopBB);
- IRBuilder<> builder(origBB, origBB->getTerminator());
+// Lower memset to loop.
+void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
+ Value *CopyLen, Value *SetValue, LLVMContext &Context,
+ Function &F) {
+ BasicBlock *OrigBB = ConvertedInst->getParent();
+ BasicBlock *NewBB =
+ ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
- unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
// Cast pointer to the type of value getting stored
- dstAddr =
- builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
- IRBuilder<> loop(loopBB);
- PHINode *ind = loop.CreatePHI(len->getType(), 0);
- ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+ LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
- loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ false);
- Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
- ind->addIncoming(newind, loopBB);
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
- loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
}
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
- SmallVector<LoadInst *, 4> aggrLoads;
- SmallVector<MemTransferInst *, 4> aggrMemcpys;
- SmallVector<MemSetInst *, 4> aggrMemsets;
+ SmallVector<LoadInst *, 4> AggrLoads;
+ SmallVector<MemIntrinsic *, 4> MemCalls;
const DataLayout &DL = F.getParent()->getDataLayout();
LLVMContext &Context = F.getParent()->getContext();
- //
- // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
- //
+ // Collect all aggregate loads and mem* calls.
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
++II) {
- if (LoadInst *load = dyn_cast<LoadInst>(II)) {
- if (!load->hasOneUse())
+ if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+ if (!LI->hasOneUse())
continue;
- if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+ if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
continue;
- User *use = load->user_back();
- if (StoreInst *store = dyn_cast<StoreInst>(use)) {
- if (store->getOperand(0) != load)
+ if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
+ if (SI->getOperand(0) != LI)
continue;
- aggrLoads.push_back(load);
- }
- } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
- Value *len = intr->getLength();
- // If the number of elements being copied is greater
- // than MaxAggrCopySize, lower it to a loop
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemcpys.push_back(intr);
- }
- } else {
- // turn variable length memcpy/memmov into loop
- aggrMemcpys.push_back(intr);
+ AggrLoads.push_back(LI);
}
- } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
- Value *len = memsetintr->getLength();
- if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
- if (len_int->getZExtValue() >= MaxAggrCopySize) {
- aggrMemsets.push_back(memsetintr);
+ } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+ // Convert intrinsic calls with variable size or with constant size
+ // larger than the MaxAggrCopySize threshold.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
+ if (LenCI->getZExtValue() >= MaxAggrCopySize) {
+ MemCalls.push_back(IntrCall);
}
} else {
- // turn variable length memset into loop
- aggrMemsets.push_back(memsetintr);
+ MemCalls.push_back(IntrCall);
}
}
}
}
- if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
- (aggrMemsets.size() == 0))
+
+ if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
return false;
+ }
//
// Do the transformation of an aggr load/copy/set to a loop
//
- for (LoadInst *load : aggrLoads) {
- StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
- Value *srcAddr = load->getOperand(0);
- Value *dstAddr = store->getOperand(1);
- unsigned numLoads = DL.getTypeStoreSize(load->getType());
- Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
-
- convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
- store->isVolatile(), Context, F);
-
- store->eraseFromParent();
- load->eraseFromParent();
+ for (LoadInst *LI : AggrLoads) {
+ StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
+ Value *SrcAddr = LI->getOperand(0);
+ Value *DstAddr = SI->getOperand(1);
+ unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
+ Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
+
+ convertMemCpyToLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ SI->eraseFromParent();
+ LI->eraseFromParent();
}
- for (MemTransferInst *cpy : aggrMemcpys) {
- convertTransferToLoop(/* splitAt */ cpy,
- /* srcAddr */ cpy->getSource(),
- /* dstAddr */ cpy->getDest(),
- /* len */ cpy->getLength(),
- /* srcVolatile */ cpy->isVolatile(),
- /* dstVolatile */ cpy->isVolatile(),
+ // Transform mem* intrinsic calls.
+ for (MemIntrinsic *MemCall : MemCalls) {
+ if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+ convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile(),
/* Context */ Context,
/* Function F */ F);
- cpy->eraseFromParent();
- }
-
- for (MemSetInst *memsetinst : aggrMemsets) {
- Value *len = memsetinst->getLength();
- Value *val = memsetinst->getValue();
- convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
- F);
- memsetinst->eraseFromParent();
+ } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+ convertMemMoveToLoop(/* ConvertedInst */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile(),
+ /* Context */ Context,
+ /* Function F */ F);
+
+ } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+ convertMemSetToLoop(/* ConvertedInst */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Context */ Context,
+ /* Function F */ F);
+ }
+ MemCall->eraseFromParent();
}
return true;
}
+} // namespace
+
+namespace llvm {
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
+ "Lower aggregate copies, and llvm.mem* intrinsics into loops",
+ false, false)
+
FunctionPass *llvm::createLowerAggrCopies() {
return new NVPTXLowerAggrCopies();
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 93d0025..624052e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
- // of instructions this is unecessary and may introduce redudant
+ // of instructions this is unnecessary and may introduce redundant
// address cast.
const auto &AllocaUse = *UI++;
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index b533f31..6656077 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -47,6 +47,36 @@
// ...
// }
//
+// 3. Convert pointers in a byval kernel parameter to pointers in the global
+// address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
+//
+// struct S {
+// int *x;
+// int *y;
+// };
+// __global__ void foo(S s) {
+// int *b = s.y;
+// // use b
+// }
+//
+// "b" points to the global address space. In the IR level,
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// ; use %b
+// }
+//
+// becomes
+//
+// define void @foo({i32*, i32*}* byval %input) {
+// %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+// %b = load i32*, i32** %b_ptr
+// %b_global = addrspacecast i32* %b to i32 addrspace(1)*
+// %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32*
+// ; use %b_generic
+// }
+//
// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
// don't cancel the addrspacecast pair this pass emits.
//===----------------------------------------------------------------------===//
@@ -54,6 +84,7 @@
#include "NVPTX.h"
#include "NVPTXUtilities.h"
#include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
// handle byval parameters
- void handleByValParam(Argument *);
- // handle non-byval pointer parameters
- void handlePointerParam(Argument *);
+ void handleByValParam(Argument *Arg);
+ // Knowing Ptr must point to the global address space, this function
+ // addrspacecasts Ptr to global and then back to generic. This allows
+ // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
+ // loads/stores that appear later.
+ void markPointerAsGlobal(Value *Ptr);
public:
static char ID; // Pass identification, replacement for typeid
@@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
//
// The above code allocates some space in the stack and copies the incoming
// struct from param space to local space.
-// Then replace all occurences of %d by %temp.
+// Then replace all occurrences of %d by %temp.
// =============================================================================
void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
Function *Func = Arg->getParent();
@@ -128,27 +162,33 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
new StoreInst(LI, AllocA, FirstInst);
}
-void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
- assert(!Arg->hasByValAttr() &&
- "byval params should be handled by handleByValParam");
-
- // Do nothing if the argument already points to the global address space.
- if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+ if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
return;
- Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
- Instruction *ArgInGlobal = new AddrSpaceCastInst(
- Arg, PointerType::get(Arg->getType()->getPointerElementType(),
+ // Deciding where to emit the addrspacecast pair.
+ BasicBlock::iterator InsertPt;
+ if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+ // Insert at the functon entry if Ptr is an argument.
+ InsertPt = Arg->getParent()->getEntryBlock().begin();
+ } else {
+ // Insert right after Ptr if Ptr is an instruction.
+ InsertPt = ++cast<Instruction>(Ptr)->getIterator();
+ assert(InsertPt != InsertPt->getParent()->end() &&
+ "We don't call this function with Ptr being a terminator.");
+ }
+
+ Instruction *PtrInGlobal = new AddrSpaceCastInst(
+ Ptr, PointerType::get(Ptr->getType()->getPointerElementType(),
ADDRESS_SPACE_GLOBAL),
- Arg->getName(), FirstInst);
- Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
- Arg->getName(), FirstInst);
- // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
- Arg->replaceAllUsesWith(ArgInGeneric);
- ArgInGlobal->setOperand(0, Arg);
+ Ptr->getName(), &*InsertPt);
+ Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
+ Ptr->getName(), &*InsertPt);
+ // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.
+ Ptr->replaceAllUsesWith(PtrInGeneric);
+ PtrInGlobal->setOperand(0, Ptr);
}
-
// =============================================================================
// Main function for this pass.
// =============================================================================
@@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
if (!isKernelFunction(F))
return false;
+ if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
+ // Mark pointers in byval structs as global.
+ for (auto &B : F) {
+ for (auto &I : B) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+ if (LI->getType()->isPointerTy()) {
+ Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
+ F.getParent()->getDataLayout());
+ if (Argument *Arg = dyn_cast<Argument>(UO)) {
+ if (Arg->hasByValAttr()) {
+ // LI is a load from a pointer within a byval kernel parameter.
+ markPointerAsGlobal(LI);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
for (Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
if (Arg.hasByValAttr())
handleByValParam(&Arg);
else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
- handlePointerParam(&Arg);
+ markPointerAsGlobal(&Arg);
}
}
return true;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index 46b4b33..81a606d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -68,7 +68,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -110,7 +110,7 @@ public:
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *findAssociatedSection() const override { return nullptr; }
+ MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5fd69a6..17019d7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().isReturn())
+ if (I->isReturnBlock())
TFI.emitEpilogue(MF, *I);
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
index 0d2627d..45a7309 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSection.h
@@ -19,15 +19,14 @@
#include <vector>
namespace llvm {
-/// NVPTXSection - Represents a section in PTX
-/// PTX does not have sections. We create this class in order to use
-/// the ASMPrint interface.
+/// Represents a section in PTX PTX does not have sections. We create this class
+/// in order to use the ASMPrint interface.
///
-class NVPTXSection : public MCSection {
+class NVPTXSection final : public MCSection {
virtual void anchor();
public:
NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
- virtual ~NVPTXSection() {}
+ ~NVPTXSection() {}
/// Override this as NVPTX has its own way of printing switching
/// to a section.
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 248f9e1..aa931b1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
@@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() {
// FIXME: This pass is really intended to be invoked during IR optimization,
// but it's very NVPTX-specific.
- initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
- initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
- initializeNVPTXFavorNonGenericAddrSpacesPass(
- *PassRegistry::getPassRegistry());
- initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
- initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeNVVMReflectPass(PR);
+ initializeGenericToNVVMPass(PR);
+ initializeNVPTXAllocaHoistingPass(PR);
+ initializeNVPTXAssignValidGlobalNamesPass(PR);
+ initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+ initializeNVPTXLowerKernelArgsPass(PR);
+ initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerAggrCopiesPass(PR);
}
static std::string computeDataLayout(bool is64Bit) {
@@ -139,6 +141,10 @@ public:
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+ // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+ void addEarlyCSEOrGVNPass();
};
} // end anonymous namespace
@@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
});
}
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+}
+
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
@@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
- disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
- TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+
+ // === Propagate special address spaces ===
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
+
+ // === Straight-line scalar optimizations ===
addPass(createSeparateConstOffsetFromGEPPass());
+ addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
// EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
// for some of our benchmarks.
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(createGVNPass());
- else
- addPass(createEarlyCSEPass());
+ addEarlyCSEOrGVNPass();
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(createNaryReassociatePass());
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
+
+ // === LSR and other generic IR passes ===
+ TargetPassConfig::addIRPasses();
+ // EarlyCSE is not always strong enough to clean up what LSR produces. For
+ // example, GVN can combine
+ //
+ // %0 = add %a, %b
+ // %1 = add %b, %a
+ //
+ // and
+ //
+ // %0 = shl nsw %a, 2
+ // %1 = shl %a, 2
+ //
+ // but EarlyCSE can do neither of them.
+ addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 5ecdc87..0f88ddf 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -48,8 +48,7 @@ public:
void Initialize(MCContext &ctx, const TargetMachine &TM) override {
TargetLoweringObjectFile::Initialize(ctx, TM);
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
- DataSection =
- new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+ DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData());
BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
ReadOnlySection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
@@ -84,7 +83,7 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override {
return ReadOnlySection;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e7250cd..6e679dd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
return false;
}
-unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+int NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 5bcd1e2..0946a32 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -52,7 +52,7 @@ public:
bool isSourceOfDivergence(const Value *V);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1f178af..578b466 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
return false;
}
-bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
- if ((id == Intrinsic::nvvm_barrier0) ||
- (id == Intrinsic::nvvm_barrier0_popc) ||
- (id == Intrinsic::nvvm_barrier0_and) ||
- (id == Intrinsic::nvvm_barrier0_or) ||
- (id == Intrinsic::cuda_syncthreads))
- return true;
- return false;
-}
-
-// Interface for checking all memory space transfer related intrinsics
-bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
- if (id == Intrinsic::nvvm_ptr_local_to_gen ||
- id == Intrinsic::nvvm_ptr_shared_to_gen ||
- id == Intrinsic::nvvm_ptr_global_to_gen ||
- id == Intrinsic::nvvm_ptr_constant_to_gen ||
- id == Intrinsic::nvvm_ptr_gen_to_global ||
- id == Intrinsic::nvvm_ptr_gen_to_shared ||
- id == Intrinsic::nvvm_ptr_gen_to_local ||
- id == Intrinsic::nvvm_ptr_gen_to_constant ||
- id == Intrinsic::nvvm_ptr_gen_to_param) {
- return true;
- }
-
- return false;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// provide an option to ignore GEP indicies for find out the base address only
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
- V = V->stripPointerCasts();
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (ignore_GEP_indices)
- if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- }
- break;
- }
- return V;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// - ignore GEP indicies for find out the base address only, and
-// - tracking PHINode
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
- if (processed.find(V) != processed.end())
- return nullptr;
- processed.insert(V);
-
- const Value *V2 = V->stripPointerCasts();
- if (V2 != V && processed.find(V2) != processed.end())
- return nullptr;
- processed.insert(V2);
-
- V = V2;
-
- while (true) {
- if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
- if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
- V = IS->getArgOperand(0)->stripPointerCasts();
- continue;
- }
- } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- V = GEP->getPointerOperand()->stripPointerCasts();
- continue;
- } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
- if (V != V2 && processed.find(V) != processed.end())
- return nullptr;
- processed.insert(PN);
- const Value *common = nullptr;
- for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
- const Value *pv = PN->getIncomingValue(i);
- const Value *base = skipPointerTransfer(pv, processed);
- if (base) {
- if (!common)
- common = base;
- else if (common != base)
- return PN;
- }
- }
- if (!common)
- return PN;
- V = common;
- }
- break;
- }
- return V;
-}
-
-// The following are some useful utilities for debuggung
+// The following are some useful utilities for debugging
BasicBlock *llvm::getParentBlock(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
@@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) {
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
- BasicBlock *B = it;
+ BasicBlock *B = &*it;
if (strcmp(B->getName().data(), blockName) == 0) {
B->dump();
return;
@@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
return nullptr;
}
-// Dump an instruction by nane
+// Dump an instruction by name
void llvm::dumpInst(Value *base, char *instName) {
Instruction *I = getInst(base, instName);
if (I)
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index 7e2ce73..a5262cb 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &);
bool getAlign(const llvm::Function &, unsigned index, unsigned &);
bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
-bool isBarrierIntrinsic(llvm::Intrinsic::ID);
-
-/// make_vector - Helper function which is useful for building temporary vectors
-/// to pass into type construction of CallInst ctors. This turns a null
-/// terminated list of pointers (or other value types) into a real live vector.
-///
-template <typename T> inline std::vector<T> make_vector(T A, ...) {
- va_list Args;
- va_start(Args, A);
- std::vector<T> Result;
- Result.push_back(A);
- while (T Val = va_arg(Args, T))
- Result.push_back(Val);
- va_end(Args);
- return Result;
-}
-
-bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
-const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *
-skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
BasicBlock *getParentBlock(Value *v);
Function *getParentFunction(Value *v);
void dumpBlock(Value *v, char *blockName);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
index a237247..e69bbba 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXVector.td
@@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V2I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v2i16 V2I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
(ins V4I16Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int16Regs:$dst, (vector_extract
+ [(set Int16Regs:$dst, (extractelt
(v4i16 V4I16Regs:$src), imm:$c))],
IMOV16rr>;
@@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V2I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v2i8 V2I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
(ins V4I8Regs:$src, i8imm:$c),
"mov.u16 \t$dst, $src${c:vecelem};",
- [(set Int8Regs:$dst, (vector_extract
+ [(set Int8Regs:$dst, (extractelt
(v4i8 V4I8Regs:$src), imm:$c))],
IMOV8rr>;
@@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V2I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v2i32 V2I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V2F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v2f32 V2F32Regs:$src), imm:$c))],
FMOV32rr>;
@@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
(ins V2I64Regs:$src, i8imm:$c),
"mov.u64 \t$dst, $src${c:vecelem};",
- [(set Int64Regs:$dst, (vector_extract
+ [(set Int64Regs:$dst, (extractelt
(v2i64 V2I64Regs:$src), imm:$c))],
IMOV64rr>;
@@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
(ins V2F64Regs:$src, i8imm:$c),
"mov.f64 \t$dst, $src${c:vecelem};",
- [(set Float64Regs:$dst, (vector_extract
+ [(set Float64Regs:$dst, (extractelt
(v2f64 V2F64Regs:$src), imm:$c))],
FMOV64rr>;
@@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
(ins V4I32Regs:$src, i8imm:$c),
"mov.u32 \t$dst, $src${c:vecelem};",
- [(set Int32Regs:$dst, (vector_extract
+ [(set Int32Regs:$dst, (extractelt
(v4i32 V4I32Regs:$src), imm:$c))],
IMOV32rr>;
@@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
(ins V4F32Regs:$src, i8imm:$c),
"mov.f32 \t$dst, $src${c:vecelem};",
- [(set Float32Regs:$dst, (vector_extract
+ [(set Float32Regs:$dst, (extractelt
(v4f32 V4F32Regs:$src), imm:$c))],
FMOV32rr>;
}
@@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I8Regs:$dst,
- (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v4i8
def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
@@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I8Regs:$dst,
- (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
- IMOV8rr>;
+ (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
// Insert v2i16
def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
@@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
"mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V2I16Regs:$dst,
- (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v4i16
def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
@@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
"mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u16 \t$dst${c:vecelem}, $val;",
[(set V4I16Regs:$dst,
- (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
- IMOV16rr>;
+ (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+ IMOV16rr>;
// Insert v2i32
def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
@@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
"mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V2I32Regs:$dst,
- (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v2f32
def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
@@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
"mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V2F32Regs:$dst,
- (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
// Insert v2i64
def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
@@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
"mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u64 \t$dst${c:vecelem}, $val;",
[(set V2I64Regs:$dst,
- (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
- IMOV64rr>;
+ (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+ IMOV64rr>;
// Insert v2f64
def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
@@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
"mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f64 \t$dst${c:vecelem}, $val;",
[(set V2F64Regs:$dst,
- (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
- FMOV64rr>;
+ (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+ FMOV64rr>;
// Insert v4i32
def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
@@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
"mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.u32 \t$dst${c:vecelem}, $val;",
[(set V4I32Regs:$dst,
- (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
- IMOV32rr>;
+ (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+ IMOV32rr>;
// Insert v4f32
def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
@@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
"mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
"\n\tmov.f32 \t$dst${c:vecelem}, $val;",
[(set V4F32Regs:$dst,
- (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
- FMOV32rr>;
+ (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+ FMOV32rr>;
}
class BinOpAsmString<string c> {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index 5e375b7..20ab5db 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() {
for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ",");
+ StringRef(ReflectList[i]).split(NameValList, ',');
for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, "=");
+ NameValList[j].split(NameValPair, '=');
assert(NameValPair.size() == 2 && "name=val expected");
std::stringstream ValStream(NameValPair[1]);
int Val;
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a699a55..220c70a 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -243,7 +243,6 @@ namespace {
struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
bool IsPPC64;
bool IsDarwin;
@@ -291,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
- const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), MII(MII) {
+ PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &,
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
+ : MCTargetAsmParser(Options, STI), MII(MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -1185,7 +1184,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::MFTB: {
- if (STI.getFeatureBits()[PPC::FeatureMFTB]) {
+ if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) {
assert(Inst.getNumOperands() == 2 && "Expecting two operands");
Inst.setOpcode(PPC::MFSPR);
}
@@ -1205,7 +1204,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Post-process instructions (typically extended mnemonics)
ProcessInstruction(Inst, Operands);
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature:
return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1690,7 +1689,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// where th can be omitted when it is 0. dcbtst is the same. We take the
// server form to be the default, so swap the operands if we're parsing for
// an embedded core (they'll be swapped again upon printing).
- if (STI.getFeatureBits()[PPC::FeatureBookE] &&
+ if (getSTI().getFeatureBits()[PPC::FeatureBookE] &&
Operands.size() == 4 &&
(Name == "dcbt" || Name == "dcbtst")) {
std::swap(Operands[1], Operands[3]);
@@ -1730,10 +1729,19 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
if (getParser().parseExpression(Value))
return false;
- getParser().getStreamer().EmitValue(Value, Size);
+ if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else {
+ getStreamer().EmitValue(Value, Size, ExprLoc);
+ }
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 93a503c..1fc84fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -401,8 +401,6 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
return result;
-
- MI.clear();
}
return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8e18783..53eb727 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -18,8 +18,6 @@
namespace llvm {
-class MCOperand;
-
class PPCInstPrinter : public MCInstPrinter {
bool IsDarwin;
public:
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 992be5b..dd99495 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -113,6 +113,10 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
}
break;
+ case PPC::fixup_ppc_half16ds:
+ Target.print(errs());
+ errs() << '\n';
+ report_fatal_error("Invalid PC-relative half16ds relocation");
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -305,13 +309,13 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_GOT:
Type = ELF::R_PPC64_GOT16_DS;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_GOT_LO:
Type = ELF::R_PPC64_GOT16_LO_DS;
break;
case MCSymbolRefExpr::VK_PPC_TOC:
Type = ELF::R_PPC64_TOC16_DS;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_TOC_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
@@ -372,16 +376,16 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC64_ADDR64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_DTPMOD:
Type = ELF::R_PPC64_DTPMOD64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_TPREL:
Type = ELF::R_PPC64_TPREL64;
- break;
+ break;
case MCSymbolRefExpr::VK_PPC_DTPREL:
Type = ELF::R_PPC64_DTPREL64;
- break;
+ break;
}
break;
case FK_Data_4:
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 86ad385..e252ac9 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,18 +20,19 @@
namespace llvm {
class Triple;
- class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
- };
-
- class PPCELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
- };
+class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
+};
+
+class PPCELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
+};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index a641780..d42a111 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -82,8 +82,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
// There are no TLS PPCMCExprs at the moment.
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9d72896..b54a0e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -241,12 +241,12 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
if (FixupOffset > 0xffffff) {
char Buffer[32];
format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
Twine("Section too large, can't encode "
"r_address (") +
Buffer + ") into 24 bits of scattered "
"relocation entry.");
- llvm_unreachable("fatal error returned?!");
+ return false;
}
// Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 6075631..acea600 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -56,6 +56,14 @@ namespace PPC {
PRED_BIT_UNSET = 1025
};
+ // Bit for branch taken (plus) or not-taken (minus) hint
+ enum BranchHintBit {
+ BR_NO_HINT = 0x0,
+ BR_NONTAKEN_HINT = 0x2,
+ BR_TAKEN_HINT = 0x3,
+ BR_HINT_MASK = 0X3
+ };
+
/// Invert the specified predicate. != -> ==, < -> >=.
Predicate InvertPredicate(Predicate Opcode);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index ae8d8b4..a259ed3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -41,13 +41,16 @@ namespace llvm {
FunctionPass *createPPCVSXCopyPass();
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCVSXSwapRemovalPass();
+ FunctionPass *createPPCMIPeepholePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
+ FunctionPass *createPPCBoolRetToIntPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
+ void initializePPCBoolRetToIntPass(PassRegistry&);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 641b237..b03be12 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -50,6 +50,8 @@ def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
+def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software emulation for floating point">;
def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
"Enable 64-bit registers usage for ppc32 [beta]">;
def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
@@ -137,6 +139,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+ "Target supports add/load integer fusion.">;
+def FeatureFloat128 :
+ SubtargetFeature<"float128", "HasFloat128", "true",
+ "Enable the __float128 data type for IEEE-754R Binary128.",
+ [FeatureVSX]>;
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
@@ -168,7 +176,8 @@ def ProcessorFeatures {
FeatureMFTB, DeprecatedDST];
list<SubtargetFeature> Power8SpecificFeatures =
[DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
- FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
+ FeatureFusion];
list<SubtargetFeature> Power8FeatureList =
!listconcat(Power7FeatureList, Power8SpecificFeatures);
}
@@ -309,7 +318,7 @@ def : ProcessorModel<"g5", G5Model,
Feature64Bit /*, Feature64BitRegs */,
FeatureMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
- [DirectiveE500mc, FeatureMFOCRF,
+ [DirectiveE500mc,
FeatureSTFIWX, FeatureICBT, FeatureBookE,
FeatureISEL, FeatureMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
@@ -403,6 +412,7 @@ def PPCAsmParserVariant : AsmParserVariant {
// InstAlias definitions use immediate literals. Set RegisterPrefix
// so that those are not misinterpreted as registers.
string RegisterPrefix = "%";
+ string BreakCharacters = ".";
}
def PPC : Target {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8e118ec..9a63c14 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -65,19 +65,20 @@ using namespace llvm;
#define DEBUG_TYPE "asmprinter"
namespace {
- class PPCAsmPrinter : public AsmPrinter {
- protected:
- MapVector<MCSymbol*, MCSymbol*> TOC;
- const PPCSubtarget *Subtarget;
- StackMaps SM;
- public:
- explicit PPCAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
-
- const char *getPassName() const override {
- return "PowerPC Assembly Printer";
- }
+class PPCAsmPrinter : public AsmPrinter {
+protected:
+ MapVector<MCSymbol *, MCSymbol *> TOC;
+ const PPCSubtarget *Subtarget;
+ StackMaps SM;
+
+public:
+ explicit PPCAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+
+ const char *getPassName() const override {
+ return "PowerPC Assembly Printer";
+ }
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
@@ -94,10 +95,8 @@ namespace {
void EmitEndOfAsmFile(Module &M) override;
- void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
- void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
+ void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+ void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
@@ -157,15 +156,15 @@ static const char *stripRegisterPrefix(const char *RegName) {
return RegName + 1;
case 'c': if (RegName[1] == 'r') return RegName + 2;
}
-
+
return RegName;
}
void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand(OpNo);
-
+
switch (MO.getType()) {
case MachineOperand::MO_Register: {
const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
@@ -184,8 +183,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
MO.getMBB()->getSymbol()->print(O, MAI);
return;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
return;
case MachineOperand::MO_BlockAddress:
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
@@ -200,19 +199,19 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
!GV->isStrongDefinitionForLinker()) {
if (!GV->hasHiddenVisibility()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>()
- .getGVStubEntry(SymToPrint);
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+ SymToPrint);
if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-
- MachineModuleInfoImpl::StubValueTy &StubSym =
- MMI->getObjFileInfo<MachineModuleInfoMachO>().
- getHiddenGVStubEntry(SymToPrint);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
+ SymToPrint);
if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
@@ -295,16 +294,16 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
}
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
- {
- // FIXME: Currently for PowerPC memory operands are always loaded
- // into a register, so we never get an update or indexed form.
- // This is bad even for offset forms, since even if we know we
- // have a value in -16(r1), we will generate a load into r<n>
- // and then load from 0(r<n>). Until that issue is fixed,
- // tolerate 'U' and 'X' but don't output anything.
- assert(MI->getOperand(OpNo).isReg());
- return false;
- }
+ {
+ // FIXME: Currently for PowerPC memory operands are always loaded
+ // into a register, so we never get an update or indexed form.
+ // This is bad even for offset forms, since even if we know we
+ // have a value in -16(r1), we will generate a load into r<n>
+ // and then load from 0(r<n>). Until that issue is fixed,
+ // tolerate 'U' and 'X' but don't output anything.
+ assert(MI->getOperand(OpNo).isReg());
+ return false;
+ }
}
}
@@ -315,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
-
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
@@ -330,8 +328,7 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
SM.serializeToStackMapSection();
}
-void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
+void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
unsigned NumNOPBytes = MI.getOperand(1).getImm();
SM.recordStackMap(MI);
@@ -353,13 +350,12 @@ void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
// Emit nops.
for (unsigned i = 0; i < NumNOPBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
}
// Lower a patchpoint of the form:
// [<def>], <id>, <numBytes>, <target>, <numArgs>
-void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
+void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);
@@ -375,60 +371,59 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
-
// If we're on ELFv1, then we need to load the actual function pointer
// from the function descriptor.
if (!Subtarget->isELFv2ABI()) {
- // Load the new TOC pointer and the function address, but not r11
- // (needing this is rare, and loading it here would prevent passing it
- // via a 'nest' parameter.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ // Load the new TOC pointer and the function address, but not r11
+ // (needing this is rare, and loading it here would prevent passing it
+ // via a 'nest' parameter.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(8)
.addReg(ScratchReg));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(ScratchReg)
.addImm(0)
.addReg(ScratchReg));
++EncodedBytes;
}
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8)
.addReg(ScratchReg));
++EncodedBytes;
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8));
++EncodedBytes;
// Restore the TOC pointer after the call.
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
@@ -439,7 +434,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP)
.addExpr(SymVar));
EncodedBytes += 2;
}
@@ -454,7 +449,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
assert((NumBytes - EncodedBytes) % 4 == 0 &&
"Invalid number of NOP bytes requested!");
for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
}
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
@@ -499,16 +494,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
bool isDarwin = TM.getTargetTriple().isOSDarwin();
const Module *M = MF->getFunction()->getParent();
PICLevel::Level PL = M->getPICLevel();
-
+
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(*OutStreamer, SM, *MI);
+ return LowerSTACKMAP(SM, *MI);
case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+ return LowerPATCHPOINT(SM, *MI);
case PPC::MoveGOTtoLR: {
// Transform %LR = MoveGOTtoLR
@@ -533,17 +528,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
- // Into this, where the label is the PIC base:
+ // Into this, where the label is the PIC base:
// bl L1$pb
// L1$pb:
MCSymbol *PICBase = MF->getPICBaseSymbol();
-
+
// Emit the 'bl'.
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
- // FIXME: We would like an efficient form for this, so we don't have to do
- // a lot of extra uniquing.
- .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
-
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we
+ // don't have to do a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
// Emit the label.
OutStreamer->EmitLabel(PICBase);
return;
@@ -654,7 +650,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
-
+
case PPC::ADDIStocHA: {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
@@ -669,28 +665,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MO.isBlockAddress()) &&
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
- bool IsExternal = false;
- bool IsNonLocalFunction = false;
- bool IsCommon = false;
- bool IsAvailExt = false;
+ bool GlobalToc = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
MOSymbol = getSymbol(GV);
- IsExternal = GV->isDeclaration();
- IsCommon = GV->hasCommonLinkage();
- IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker();
- IsAvailExt = GV->hasAvailableExternallyLinkage();
- } else if (MO.isCPI())
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
+ } else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
- else if (MO.isJTI())
+ } else if (MO.isJTI()) {
MOSymbol = GetJTISymbol(MO.getIndex());
- else if (MO.isBlockAddress())
+ } else if (MO.isBlockAddress()) {
MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+ }
- if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
- MO.isJTI() || MO.isBlockAddress() ||
+ if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -727,13 +717,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
else if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- MOSymbol = getSymbol(GValue);
- if (GValue->getType()->getElementType()->isFunctionTy() ||
- GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage() ||
- TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert((GVFlags & PPCII::MO_NLP_FLAG) &&
+ "LDtocL used on symbol that could be accessed directly is "
+ "invalid. Must match ADDIStocHA."));
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
const MCExpr *Exp =
@@ -754,21 +745,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = nullptr;
- bool IsExternal = false;
- bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
+ DEBUG(
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+ assert (
+ !(GVFlags & PPCII::MO_NLP_FLAG) &&
+ "Interposable definitions must use indirect access."));
MOSymbol = getSymbol(GV);
- IsExternal = GV->isDeclaration();
- IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker();
- } else if (MO.isCPI())
+ } else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
-
- if (IsNonLocalFunction || IsExternal ||
- TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
const MCExpr *Exp =
MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
@@ -840,13 +828,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
case PPC::PPC32GOT: {
- MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
- const MCExpr *SymGotTlsL =
- MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
- OutContext);
- const MCExpr *SymGotTlsHA =
- MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
- OutContext);
+ MCSymbol *GOTSymbol =
+ OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext);
+ const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create(
+ GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext);
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
.addReg(MI->getOperand(0).getReg())
.addExpr(SymGotTlsL));
@@ -1079,14 +1066,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
// linux/ppc32 - Normal entry label.
- if (!Subtarget->isPPC64() &&
- (TM.getRelocationModel() != Reloc::PIC_ ||
+ if (!Subtarget->isPPC64() &&
+ (TM.getRelocationModel() != Reloc::PIC_ ||
MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
return AsmPrinter::EmitFunctionEntryLabel();
if (!Subtarget->isPPC64()) {
const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
- if (PPCFI->usesPICBase()) {
+ if (PPCFI->usesPICBase()) {
MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
MCSymbol *PICBase = MF->getPICBaseSymbol();
OutStreamer->EmitLabel(RelocSymbol);
@@ -1130,11 +1117,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer->SwitchSection(Current.first, Current.second);
}
-
bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
- bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ bool isPPC64 = DL.getPointerSizeInBits() == 64;
PPCTargetStreamer &TS =
static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
@@ -1293,8 +1279,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
if (TM.getRelocationModel() == Reloc::PIC_) {
OutStreamer->SwitchSection(
@@ -1325,7 +1311,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
// Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
// This is because the MachineFunction won't exist (but have not yet been
@@ -1338,8 +1324,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
S.EmitInstruction(Inst, *STI);
};
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
// .lazy_symbol_pointer
MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -1353,12 +1339,12 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
OutStreamer->SwitchSection(StubSection);
EmitAlignment(4);
-
+
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
-
+
OutStreamer->EmitLabel(Stub);
OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
@@ -1463,20 +1449,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
}
}
-
+
OutStreamer->AddBlankLine();
}
-
bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
- bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
// Darwin/PPC always uses mach-o.
- const TargetLoweringObjectFileMachO &TLOFMacho =
- static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
MachineModuleInfoMachO &MMIMacho =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
if (!Stubs.empty())
EmitFunctionStubs(Stubs);
@@ -1484,27 +1469,27 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (MAI->doesSupportExceptionHandling() && MMI) {
// Add the (possibly multiple) personalities to the set of global values.
// Only referenced functions get into the Personalities list.
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
- for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
- E = Personalities.end(); I != E; ++I) {
- if (*I) {
- MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ for (const Function *Personality : MMI->getPersonalities()) {
+ if (Personality) {
+ MCSymbol *NLPSym =
+ getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
- MMIMacho.getGVStubEntry(NLPSym);
- StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym =
+ MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
}
}
}
// Output stubs for dynamically-linked functions.
Stubs = MMIMacho.GetGVStubList();
-
+
// Output macho stubs for external and common global variables.
if (!Stubs.empty()) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(isPPC64 ? 3 : 2);
-
+
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer->EmitLabel(Stubs[i].first);
@@ -1535,7 +1520,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
if (!Stubs.empty()) {
OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
EmitAlignment(isPPC64 ? 3 : 2);
-
+
for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
// L_foo$stub:
OutStreamer->EmitLabel(Stubs[i].first);
@@ -1573,7 +1558,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
}
// Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() {
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
new file mode 100644
index 0000000..7920240
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -0,0 +1,253 @@
+//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting i1 values to i32 if they could be more
+// profitably allocated as GPRs rather than CRs. This pass will become totally
+// unnecessary if Register Bank Allocation and Global Instruction Selection ever
+// go upstream.
+//
+// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// transitive closure of their uses includes only PHINodes, CallInsts, and
+// ReturnInsts. The rational is that arguments are generally passed and returned
+// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// actually save casts at the Machine Instruction level.
+//
+// It might be useful to expand this pass to add bit-wise operations to the list
+// of safe transitive closure types. Also, we miss some opportunities when LLVM
+// represents logical AND and OR operations with control flow rather than data
+// flow. For example by lowering the expression: return (A && B && C)
+//
+// as: return A ? true : B && C.
+//
+// There's code in SimplifyCFG that code be used to turn control flow in data
+// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so
+// this probably isn't good in general, but for the special case of i1, the
+// Selects could be further lowered to bit operations that are fast everywhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+#define DEBUG_TYPE "bool-ret-to-int"
+
+STATISTIC(NumBoolRetPromotion,
+ "Number of times a bool feeding a RetInst was promoted to an int");
+STATISTIC(NumBoolCallPromotion,
+ "Number of times a bool feeding a CallInst was promoted to an int");
+STATISTIC(NumBoolToIntPromotion,
+ "Total number of times a bool was promoted to an int");
+
+class PPCBoolRetToInt : public FunctionPass {
+
+ static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
+ SmallPtrSet<Value *, 8> Defs;
+ SmallVector<Value *, 8> WorkList;
+ WorkList.push_back(V);
+ Defs.insert(V);
+ while (!WorkList.empty()) {
+ Value *Curr = WorkList.back();
+ WorkList.pop_back();
+ if (User *CurrUser = dyn_cast<User>(Curr))
+ for (auto &Op : CurrUser->operands())
+ if (Defs.insert(Op).second)
+ WorkList.push_back(Op);
+ }
+ return Defs;
+ }
+
+ // Translate a i1 value to an equivalent i32 value:
+ static Value *translate(Value *V) {
+ Type *Int32Ty = Type::getInt32Ty(V->getContext());
+ if (Constant *C = dyn_cast<Constant>(V))
+ return ConstantExpr::getZExt(C, Int32Ty);
+ if (PHINode *P = dyn_cast<PHINode>(V)) {
+ // Temporarily set the operands to 0. We'll fix this later in
+ // runOnUse.
+ Value *Zero = Constant::getNullValue(Int32Ty);
+ PHINode *Q =
+ PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+ for (unsigned i = 0; i < P->getNumOperands(); ++i)
+ Q->addIncoming(Zero, P->getIncomingBlock(i));
+ return Q;
+ }
+
+ Argument *A = dyn_cast<Argument>(V);
+ Instruction *I = dyn_cast<Instruction>(V);
+ assert((A || I) && "Unknown value type");
+
+ auto InstPt =
+ A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
+ return new ZExtInst(V, Int32Ty, "", InstPt);
+ }
+
+ typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
+
+ // A PHINode is Promotable if:
+ // 1. Its type is i1 AND
+ // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic
+ // AND
+ // 3. All of its operands are Constant or Argument or
+ // CallInst or PHINode AND
+ // 4. All of its PHINode uses are Promotable AND
+ // 5. All of its PHINode operands are Promotable
+ static PHINodeSet getPromotablePHINodes(const Function &F) {
+ PHINodeSet Promotable;
+ // Condition 1
+ for (auto &BB : F)
+ for (auto &I : BB)
+ if (const PHINode *P = dyn_cast<PHINode>(&I))
+ if (P->getType()->isIntegerTy(1))
+ Promotable.insert(P);
+
+ SmallVector<const PHINode *, 8> ToRemove;
+ for (const auto &P : Promotable) {
+ // Condition 2 and 3
+ auto IsValidUser = [] (const Value *V) -> bool {
+ return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) ||
+ isa<DbgInfoIntrinsic>(V);
+ };
+ auto IsValidOperand = [] (const Value *V) -> bool {
+ return isa<Constant>(V) || isa<Argument>(V) || isa<CallInst>(V) ||
+ isa<PHINode>(V);
+ };
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
+ ToRemove.push_back(P);
+ }
+
+ // Iterate to convergence
+ auto IsPromotable = [&Promotable] (const Value *V) -> bool {
+ const PHINode *Phi = dyn_cast<PHINode>(V);
+ return !Phi || Promotable.count(Phi);
+ };
+ while (!ToRemove.empty()) {
+ for (auto &User : ToRemove)
+ Promotable.erase(User);
+ ToRemove.clear();
+
+ for (const auto &P : Promotable) {
+ // Condition 4 and 5
+ const auto &Users = P->users();
+ const auto &Operands = P->operands();
+ if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
+ !std::all_of(Operands.begin(), Operands.end(), IsPromotable))
+ ToRemove.push_back(P);
+ }
+ }
+
+ return Promotable;
+ }
+
+ typedef DenseMap<Value *, Value *> B2IMap;
+
+ public:
+ static char ID;
+ PPCBoolRetToInt() : FunctionPass(ID) {
+ initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) {
+ PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
+ B2IMap Bool2IntMap;
+ bool Changed = false;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
+ if (F.getReturnType()->isIntegerTy(1))
+ Changed |=
+ runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
+
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ for (auto &U : CI->operands())
+ if (U->getType()->isIntegerTy(1))
+ Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
+ }
+ }
+
+ return Changed;
+ }
+
+ static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+ B2IMap &BoolToIntMap) {
+ auto Defs = findAllDefs(U);
+
+ // If the values are all Constants or Arguments, don't bother
+ if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
+ return false;
+
+ // Presently, we only know how to handle PHINode, Constant, and Arguments.
+ // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
+ // could also be handled in the future.
+ for (const auto &V : Defs)
+ if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
+ return false;
+
+ for (const auto &V : Defs)
+ if (const PHINode *P = dyn_cast<PHINode>(V))
+ if (!PromotablePHINodes.count(P))
+ return false;
+
+ if (isa<ReturnInst>(U.getUser()))
+ ++NumBoolRetPromotion;
+ if (isa<CallInst>(U.getUser()))
+ ++NumBoolCallPromotion;
+ ++NumBoolToIntPromotion;
+
+ for (const auto &V : Defs)
+ if (!BoolToIntMap.count(V))
+ BoolToIntMap[V] = translate(V);
+
+ // Replace the operands of the translated instructions. There were set to
+ // zero in the translate function.
+ for (auto &Pair : BoolToIntMap) {
+ User *First = dyn_cast<User>(Pair.first);
+ User *Second = dyn_cast<User>(Pair.second);
+ assert((!First || Second) && "translated from user to non-user!?");
+ if (First)
+ for (unsigned i = 0; i < First->getNumOperands(); ++i)
+ Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
+ }
+
+ Value *IntRetVal = BoolToIntMap[U];
+ Type *Int1Ty = Type::getInt1Ty(U->getContext());
+ Instruction *I = cast<Instruction>(U.getUser());
+ Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
+ U.set(BackToBool);
+
+ return true;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+};
+}
+
+char PPCBoolRetToInt::ID = 0;
+INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
+ "Convert i1 constants to i32 if they are returned",
+ false, false)
+
+FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 940d55a..73a5305 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -91,7 +91,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned FuncSize = 0;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
- MachineBasicBlock *MBB = MFI;
+ MachineBasicBlock *MBB = &*MFI;
// The end of the previous block may have extra nops if this block has an
// alignment requirement.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index fd150be..b6ac4d5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -98,7 +98,7 @@ namespace {
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
private:
@@ -112,6 +112,7 @@ namespace {
const DataLayout *DL;
DominatorTree *DT;
const TargetLibraryInfo *LibInfo;
+ bool PreserveLCSSA;
};
char PPCCTRLoops::ID = 0;
@@ -147,7 +148,7 @@ INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
@@ -169,11 +170,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
bool MadeChange = false;
@@ -250,8 +252,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
// we're definitely using CTR.
case Intrinsic::ppc_is_decremented_ctr_nonzero:
- case Intrinsic::ppc_mtctr:
- return true;
+ case Intrinsic::ppc_mtctr:
+ return true;
// VisualStudio defines setjmp as _setjmp
#if defined(_MSC_VER) && defined(setjmp) && \
@@ -369,7 +371,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
true);
if (VTy == MVT::Other)
return true;
-
+
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
continue;
else if (VTy.isVector() &&
@@ -537,7 +539,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
// the CTR register because some such uses might be reordered by the
// selection DAG after the mtctr instruction).
if (!Preheader || mightUseCTR(TT, Preheader))
- Preheader = InsertPreheaderForLoop(L, this);
+ Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (!Preheader)
return MadeChange;
@@ -554,10 +556,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
if (!ExitCount->getType()->isPointerTy() &&
ExitCount->getType() != CountType)
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(CountType, 1));
- Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
- Preheader->getTerminator());
+ ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
+ Value *ECValue =
+ SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
IRBuilder<> CountBuilder(Preheader->getTerminator());
Module *M = Preheader->getParent()->getParent();
@@ -677,7 +678,7 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
// any other instructions that might clobber the ctr register.
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
I != IE; ++I) {
- MachineBasicBlock *MBB = I;
+ MachineBasicBlock *MBB = &*I;
if (!MDT->isReachableFromEntry(MBB))
continue;
@@ -694,4 +695,3 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
return false;
}
#endif // NDEBUG
-
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index fc89753..7cb1bb5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -71,15 +71,20 @@ protected:
for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
bool OtherReference = false, BlockChanged = false;
+
+ if ((*PI)->empty())
+ continue;
+
for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
- MachineInstrBuilder MIB;
+ if (J == (*PI)->end())
+ break;
+
if (J->getOpcode() == PPC::B) {
if (J->getOperand(0).getMBB() == &ReturnMBB) {
// This is an unconditional branch to the return. Replace the
// branch with a blr.
- MIB =
- BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
- MIB.copyImplicitOps(I);
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -90,10 +95,10 @@ protected:
if (J->getOperand(2).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
- .addImm(J->getOperand(0).getImm())
- .addReg(J->getOperand(1).getReg());
- MIB.copyImplicitOps(I);
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg())
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -104,11 +109,11 @@ protected:
if (J->getOperand(1).getMBB() == &ReturnMBB) {
// This is a conditional branch to the return. Replace the branch
// with a bclr.
- MIB = BuildMI(**PI, J, J->getDebugLoc(),
- TII->get(J->getOpcode() == PPC::BC ?
- PPC::BCLR : PPC::BCLRn))
- .addReg(J->getOperand(0).getReg());
- MIB.copyImplicitOps(I);
+ BuildMI(
+ **PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg())
+ .copyImplicitOps(I);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
BlockChanged = true;
@@ -146,7 +151,7 @@ protected:
}
for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
- PredToRemove[i]->removeSuccessor(&ReturnMBB);
+ PredToRemove[i]->removeSuccessor(&ReturnMBB, true);
if (Changed && !ReturnMBB.hasAddressTaken()) {
// We now might be able to merge this blr-only block into its
@@ -156,7 +161,7 @@ protected:
if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
// Move the blr into the preceding block.
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
- PrevMBB.removeSuccessor(&ReturnMBB);
+ PrevMBB.removeSuccessor(&ReturnMBB, true);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 5f236f7..b451ebf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -164,7 +164,8 @@ class PPCFastISel final : public FastISel {
unsigned DestReg, bool IsZExt);
unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
- unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
+ unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt = true);
unsigned PPCMaterialize32BitInt(int64_t Imm,
const TargetRegisterClass *RC);
unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -292,10 +293,7 @@ bool PPCFastISel::isValueAvailable(const Value *V) const {
return true;
const auto *I = cast<Instruction>(V);
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
- return true;
-
- return false;
+ return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
}
// Given a value Obj, create an Address object Addr that represents its
@@ -527,9 +525,9 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// VSX only provides an indexed load.
if (Is32VSXLoad || Is64VSXLoad) return false;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
@@ -660,9 +658,9 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
// VSX only provides an indexed store.
if (Is32VSXStore || Is64VSXStore) return false;
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+ Addr.Offset),
MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
MFI.getObjectAlignment(Addr.Base.FI));
@@ -774,8 +772,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
.addImm(PPCPred).addReg(CondReg).addMBB(TBB);
- fastEmitBranch(FBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
return true;
}
} else if (const ConstantInt *CI =
@@ -1607,21 +1604,18 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
if (ValLocs.size() > 1)
return false;
- // Special case for returning a constant integer of any size.
- // Materialize the constant as an i64 and copy it to the return
- // register. We still need to worry about properly extending the sign. E.g:
- // If the constant has only one bit, it means it is a boolean. Therefore
- // we can't use PPCMaterializeInt because it extends the sign which will
- // cause negations of the returned value to be incorrect as they are
- // implemented as the flip of the least significant bit.
- if (isa<ConstantInt>(*RV)) {
- const Constant *C = cast<Constant>(RV);
-
+ // Special case for returning a constant integer of any size - materialize
+ // the constant as an i64 and copy it to the return register.
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
CCValAssign &VA = ValLocs[0];
unsigned RetReg = VA.getLocReg();
- unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
- VA.getLocInfo() == CCValAssign::SExt);
+ // We still need to worry about properly extending the sign. For example,
+ // we could have only a single bit or a constant that needs zero
+ // extension rather than sign extension. Make sure we pass the return
+ // value extension property to integer materialization.
+ unsigned SrcReg =
+ PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
@@ -1761,8 +1755,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
const IndirectBrInst *IB = cast<IndirectBrInst>(I);
- for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+ for (const BasicBlock *SuccBB : IB->successors())
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
return true;
}
@@ -1898,10 +1892,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
CodeModel::Model CModel = TM.getCodeModel();
- MachineMemOperand *MMO =
- FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- (VT == MVT::f32) ? 4 : 8, Align);
+ MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
@@ -1976,19 +1969,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // If/when switches are implemented, jump tables should be handled
- // on the "if" path here.
- if (CModel == CodeModel::Large ||
- (GV->getType()->getElementType()->isFunctionTy() &&
- !GV->isStrongDefinitionForLinker()) ||
- GV->isDeclaration() || GV->hasCommonLinkage() ||
- GV->hasAvailableExternallyLinkage())
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
- else
+ } else {
// Otherwise generate the ADDItocL.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+ }
}
return DestReg;
@@ -2085,12 +2074,11 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// Materialize an integer constant into a register, and return
// the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
- bool UseSExt) {
+unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+ bool UseSExt) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
- const ConstantInt *CI = cast<ConstantInt>(C);
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2105,12 +2093,17 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
&PPC::GPRCRegClass);
// If the constant is in range, use a load-immediate.
- const ConstantInt *CI = cast<ConstantInt>(C);
- if (isInt<16>(CI->getSExtValue())) {
+ if (UseSExt && isInt<16>(CI->getSExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+ .addImm(CI->getSExtValue());
+ return ImmReg;
+ } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
+ .addImm(CI->getZExtValue());
return ImmReg;
}
@@ -2138,8 +2131,8 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
return PPCMaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return PPCMaterializeGV(GV, VT);
- else if (isa<ConstantInt>(C))
- return PPCMaterializeInt(C, VT, VT != MVT::i1);
+ else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ return PPCMaterializeInt(CI, VT, VT != MVT::i1);
return 0;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 08ae717..beab844 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
/// VRRegNo - Map from a numbered VR register to its enum value.
///
-static const uint16_t VRRegNo[] = {
+static const MCPhysReg VRRegNo[] = {
PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
@@ -270,7 +270,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
// epilog blocks.
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
- if (!I->empty() && I->back().isReturn()) {
+ if (I->isReturnBlock()) {
bool FoundIt = false;
for (MBBI = I->end(); MBBI != I->begin(); ) {
--MBBI;
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UsedRegMask = 0;
for (unsigned i = 0; i != 32; ++i)
- if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ if (MRI.isPhysRegModified(VRRegNo[i]))
UsedRegMask |= 1 << (31-i);
// Live in and live out values already must be in the mask, so don't bother
@@ -325,7 +326,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
UsedRegMask != 0 && BI != BE; ++BI) {
const MachineBasicBlock &MBB = *BI;
- if (MBB.empty() || !MBB.back().isReturn())
+ if (!MBB.isReturnBlock())
continue;
const MachineInstr &Ret = MBB.back();
for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
@@ -555,9 +556,67 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
}
}
+bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const {
+ RegScavenger RS;
+ unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+
+ if (ScratchRegister)
+ *ScratchRegister = R0;
+
+ // If MBB is an entry or exit block, use R0 as the scratch register
+ if ((UseAtEnd && MBB->isReturnBlock()) ||
+ (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
+ return true;
+
+ RS.enterBasicBlock(MBB);
+
+ if (UseAtEnd && !MBB->empty()) {
+ // The scratch register will be used at the end of the block, so must consider
+ // all registers used within the block
+
+ MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
+ // If no terminator, back iterator up to previous instruction.
+ if (MBBI == MBB->end())
+ MBBI = std::prev(MBBI);
+
+ if (MBBI != MBB->begin())
+ RS.forward(MBBI);
+ }
+
+ if (!RS.isRegUsed(R0))
+ return true;
+
+ unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
+ : &PPC::GPRCRegClass);
+
+ // Make sure the register scavenger was able to find an available register
+ // If not, use R0 but return false to indicate no register was available and
+ // R0 must be used (as recommended by the ABI)
+ if (Reg == 0)
+ return false;
+
+ if (ScratchRegister)
+ *ScratchRegister = Reg;
+
+ return true;
+}
+
+bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, false, nullptr);
+}
+
+bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+ MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+ return findScratchRegister(TmpMBB, true, nullptr);
+}
+
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
@@ -589,7 +648,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move MBBI back to the beginning of the function.
+ // Move MBBI back to the beginning of the prologue block.
MBBI = MBB.begin();
// Work out frame sizes.
@@ -613,7 +672,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
- unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
// ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
@@ -642,6 +701,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+ findScratchRegister(&MBB, false, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@@ -916,27 +978,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}
void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI != MBB.end() && "Returning block has no terminator");
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+ DebugLoc dl;
+
+ if (MBBI != MBB.end())
+ dl = MBBI->getDebugLoc();
+
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
- unsigned RetOpcode = MBBI->getOpcode();
- DebugLoc dl;
-
- assert((RetOpcode == PPC::BLR ||
- RetOpcode == PPC::BLR8 ||
- RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8) &&
- "Can only insert epilog into returning blocks");
-
// Get alignment info so we know how to restore the SP.
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -959,7 +1012,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
unsigned BPReg = RegInfo->getBaseRegister(MF);
unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned ScratchReg = 0;
unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
: PPC::MTLR );
@@ -973,10 +1026,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
-
+
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
+
+ findScratchRegister(&MBB, true, &ScratchReg);
+ assert(ScratchReg && "No scratch register!");
+
if (HasFP) {
if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -1008,25 +1065,30 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
PBPOffset = FFI->getObjectOffset(PBPIndex);
}
- bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
- RetOpcode == PPC::TCRETURNdi ||
- RetOpcode == PPC::TCRETURNai ||
- RetOpcode == PPC::TCRETURNri8 ||
- RetOpcode == PPC::TCRETURNdi8 ||
- RetOpcode == PPC::TCRETURNai8;
-
- if (UsesTCRet) {
- int MaxTCRetDelta = FI->getTailCallSPDelta();
- MachineOperand &StackAdjust = MBBI->getOperand(1);
- assert(StackAdjust.isImm() && "Expecting immediate value.");
- // Adjust stack pointer.
- int StackAdj = StackAdjust.getImm();
- int Delta = StackAdj - MaxTCRetDelta;
- assert((Delta >= 0) && "Delta must be positive");
- if (MaxTCRetDelta>0)
- FrameSize += (StackAdj +Delta);
- else
- FrameSize += StackAdj;
+ bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
+
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
}
// Frames of 32KB & larger require special handling because they cannot be
@@ -1066,7 +1128,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addImm(0)
.addReg(SPReg);
}
-
}
if (MustSaveLR)
@@ -1109,52 +1170,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
- if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
- MF.getFunction()->getCallingConv() == CallingConv::Fast) {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- unsigned CallerAllocatedAmt = FI->getMinReservedArea();
-
- if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
- .addReg(SPReg).addImm(CallerAllocatedAmt);
- } else {
- BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ if (IsReturnBlock) {
+ unsigned RetOpcode = MBBI->getOpcode();
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
.addReg(ScratchReg, RegState::Kill)
.addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, AddInst)
+ BuildMI(MBB, MBBI, dl, AddInst)
.addReg(SPReg)
.addReg(FPReg)
.addReg(ScratchReg);
- }
- } else if (RetOpcode == PPC::TCRETURNdi) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri) {
- MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
- } else if (RetOpcode == PPC::TCRETURNai) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
- } else if (RetOpcode == PPC::TCRETURNdi8) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
- addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
- } else if (RetOpcode == PPC::TCRETURNri8) {
- MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
- } else if (RetOpcode == PPC::TCRETURNai8) {
- MBBI = MBB.getLastNonDebugInstr();
- MachineOperand &JumpTarget = MBBI->getOperand(0);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
}
}
@@ -1200,8 +1264,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
// Reserve stack space for the PIC Base register (R30).
// Only used in SVR4 32-bit.
if (FI->usesPICBase()) {
- int PBPSI = FI->getPICBasePointerSaveIndex();
- PBPSI = MFI->CreateFixedObject(4, -8, true);
+ int PBPSI = MFI->CreateFixedObject(4, -8, true);
FI->setPICBasePointerSaveIndex(PBPSI);
}
@@ -1710,3 +1773,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+
+bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
+ MF.getSubtarget<PPCSubtarget>().isPPC64());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index d6a389b..bbe1329 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -29,6 +29,30 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned LinkageSize;
const unsigned BasePointerSaveOffset;
+ /**
+ * \brief Find a register that can be used in function prologue and epilogue
+ *
+ * Find a register that can be use as the scratch register in function
+ * prologue and epilogue to save various registers (Link Register, Base
+ * Pointer, etc.). Prefer R0, if it is available. If it is not available,
+ * then choose a different register.
+ *
+ * This method will return true if an available register was found (including
+ * R0). If no available registers are found, the method returns false and sets
+ * ScratchRegister to R0, as per the recommendation in the ABI.
+ *
+ * \param[in] MBB The machine basic block to find an available register for
+ * \param[in] UseAtEnd Specify whether the scratch register will be used at
+ * the end of the basic block (i.e., will the scratch
+ * register kill a register defined in the basic block)
+ * \param[out] ScratchRegister The scratch register to use
+ * \return true if a scratch register was found. false of a scratch register
+ * was not found and R0 is being used as the default.
+ */
+ bool findScratchRegister(MachineBasicBlock *MBB,
+ bool UseAtEnd,
+ unsigned *ScratchRegister) const;
+
public:
PPCFrameLowering(const PPCSubtarget &STI);
@@ -92,6 +116,13 @@ public:
const SpillSlot *
getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
+ /// Methods used by shrink wrapping to determine if MBB can be used for the
+ /// function prologue/epilogue.
+ bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+ bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9322268..1eaa811 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,6 +16,8 @@
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,6 +54,11 @@ static cl::opt<bool> BPermRewriterNoMasking(
"bit permutations"),
cl::Hidden);
+static cl::opt<bool> EnableBranchHint(
+ "ppc-use-branch-hint", cl::init(true),
+ cl::desc("Enable static hinting of branches on ppc"),
+ cl::Hidden);
+
namespace llvm {
void initializePPCDAGToDAGISelPass(PassRegistry&);
}
@@ -286,7 +293,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
// Find all return blocks, outputting a restore in each epilog.
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
- if (!BB->empty() && BB->back().isReturn()) {
+ if (BB->isReturnBlock()) {
IP = BB->end(); --IP;
// Skip over all terminator instructions, which are part of the return
@@ -393,6 +400,55 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) {
return isInt32Immediate(N.getNode(), Imm);
}
+static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+ const SDValue &DestMBB) {
+ assert(isa<BasicBlockSDNode>(DestMBB));
+
+ if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+
+ const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+ const TerminatorInst *BBTerm = BB->getTerminator();
+
+ if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
+
+ const BasicBlock *TBB = BBTerm->getSuccessor(0);
+ const BasicBlock *FBB = BBTerm->getSuccessor(1);
+
+ auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
+ auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+
+ // We only want to handle cases which are easy to predict at static time, e.g.
+ // C++ throw statement, that is very likely not taken, or calling never
+ // returned function, e.g. stdlib exit(). So we set Threshold to filter
+ // unwanted cases.
+ //
+ // Below is LLVM branch weight table, we only want to handle case 1, 2
+ //
+ // Case Taken:Nontaken Example
+ // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
+ // 2. Invoke-terminating 1:1048575
+ // 3. Coldblock 4:64 __builtin_expect
+ // 4. Loop Branch 124:4 For loop
+ // 5. PH/ZH/FPH 20:12
+ const uint32_t Threshold = 10000;
+
+ if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
+ return PPC::BR_NO_HINT;
+
+ DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
+ << BB->getName() << "'\n"
+ << " -> " << TBB->getName() << ": " << TProb << "\n"
+ << " -> " << FBB->getName() << ": " << FProb << "\n");
+
+ const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
+
+ // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
+ // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
+ if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
+ std::swap(TProb, FProb);
+
+ return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
+}
// isOpcWithIntImmediate - This method tests to see if the node is a specific
// opcode and that it has a immediate integer right operand.
@@ -564,7 +620,6 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
// Handle first 32 bits.
unsigned Lo = Imm & 0xFFFF;
- unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
if (isInt<16>(Imm)) {
@@ -586,9 +641,9 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
++Result;
// Add in the last bits as required.
- if ((Hi = (Remainder >> 16) & 0xFFFF))
+ if ((Remainder >> 16) & 0xFFFF)
++Result;
- if ((Lo = Remainder & 0xFFFF))
+ if (Remainder & 0xFFFF)
++Result;
return Result;
@@ -1028,7 +1083,7 @@ class BitPermutationSelector {
BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
- DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
+ DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
BitGroups.erase(BitGroups.begin());
}
@@ -1557,10 +1612,7 @@ class BitPermutationSelector {
return false;
}
- if (VRI.RLAmt != EffRLAmt)
- return false;
-
- return true;
+ return VRI.RLAmt == EffRLAmt;
};
for (auto &BG : BitGroups) {
@@ -2781,7 +2833,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-
+
SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
unsigned DM[2];
@@ -2798,7 +2850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
SDValue Base, Offset;
- if (LD->isUnindexed() &&
+ if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
(LD->getMemoryVT() == MVT::f64 ||
LD->getMemoryVT() == MVT::i64) &&
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
@@ -2841,8 +2893,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// Op #3 is the Dest MBB
// Op #4 is the Flag.
// Prevent PPC::PRED_* from being selected into LI.
- SDValue Pred =
- getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
+ unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+
+ SDValue Pred = getI32Imm(PCC, dl);
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2871,6 +2926,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
BitComp, N->getOperand(4), N->getOperand(0));
}
+ if (EnableBranchHint)
+ PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
N->getOperand(4), N->getOperand(0) };
@@ -2903,9 +2961,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
break;
// The first source operand is a TargetGlobalAddress or a TargetJumpTable.
- // If it is an externally defined symbol, a symbol with common linkage,
- // a non-local function address, or a jump table address, or if we are
- // generating code for large code model, we generate:
+ // If it must be toc-referenced according to PPCSubTarget, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -2920,13 +2976,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
MVT::i64, GA, SDValue(Tmp, 0)));
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
- const GlobalValue *GValue = G->getGlobal();
- if ((GValue->getType()->getElementType()->isFunctionTy() &&
- !GValue->isStrongDefinitionForLinker()) ||
- GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ const GlobalValue *GV = G->getGlobal();
+ unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+ if (GVFlags & PPCII::MO_NLP_FLAG) {
return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
MVT::i64, GA, SDValue(Tmp, 0)));
+ }
}
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -3110,7 +3165,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
if (!CurDAG->MaskedValueIsZero(Op0,
APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
return false;
-
+
LHS = Op0.getOperand(0);
RHS = Op0.getOperand(1);
return true;
@@ -3305,7 +3360,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
if (N->use_empty())
continue;
@@ -3989,7 +4044,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
bool MadeChange = false;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4145,7 +4200,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
++Position;
while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = --Position;
+ SDNode *N = &*--Position;
// Skip dead nodes and any non-machine opcodes.
if (N->use_empty() || !N->isMachineOpcode())
continue;
@@ -4184,16 +4239,24 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
- // If this is a load or store with a zero offset, we may be able to
- // fold an add-immediate into the memory operation.
- if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
- N->getConstantOperandVal(FirstOp) != 0)
+ // If this is a load or store with a zero offset, or within the alignment,
+ // we may be able to fold an add-immediate into the memory operation.
+ // The check against alignment is below, as it can't occur until we check
+ // the arguments to N
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
continue;
SDValue Base = N->getOperand(FirstOp + 1);
if (!Base.isMachineOpcode())
continue;
+ // On targets with fusion, we don't want this to fire and remove a fusion
+ // opportunity, unless a) it results in another fusion opportunity or
+ // b) optimizing for size.
+ if (PPCSubTarget->hasFusion() &&
+ (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
+ continue;
+
unsigned Flags = 0;
bool ReplaceFlags = true;
@@ -4237,6 +4300,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
break;
}
+ SDValue ImmOpnd = Base.getOperand(1);
+ int MaxDisplacement = 0;
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ const GlobalValue *GV = GA->getGlobal();
+ MaxDisplacement = GV->getAlignment() - 1;
+ }
+
+ int Offset = N->getConstantOperandVal(FirstOp);
+ if (Offset < 0 || Offset > MaxDisplacement)
+ continue;
+
// We found an opportunity. Reverse the operands from the add
// immediate and substitute them into the load or store. If
// needed, update the target flags for the immediate operand to
@@ -4247,8 +4321,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
DEBUG(N->dump(CurDAG));
DEBUG(dbgs() << "\n");
- SDValue ImmOpnd = Base.getOperand(1);
-
// If the relocation information isn't already present on the
// immediate operand, add it now.
if (ReplaceFlags) {
@@ -4259,17 +4331,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
// is insufficient for the instruction encoding.
if (GV->getAlignment() < 4 &&
(StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
- StorageOpcode == PPC::LWA)) {
+ StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
continue;
}
- ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
const Constant *C = CP->getConstVal();
ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
CP->getAlignment(),
- 0, Flags);
+ Offset, Flags);
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1b8f8fb..af9ad07 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -42,10 +42,6 @@
using namespace llvm;
-// FIXME: Remove this once soft-float is supported.
-static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
-cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
-
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -72,8 +68,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Set up the register classes.
addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
- addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
- addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ if (!Subtarget.useSoftFloat()) {
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+ }
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD
for (MVT VT : MVT::integer_valuetypes()) {
@@ -107,8 +105,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
- isPPC64 ? MVT::i64 : MVT::i32);
+ AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
} else {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
@@ -257,10 +255,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::f32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i32, Expand);
- setOperationAction(ISD::BITCAST, MVT::i64, Expand);
- setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+ } else {
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+ }
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -329,6 +334,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
+ setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -403,9 +410,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// will selectively turn on ones that can be effectively codegen'd.
for (MVT VT : MVT::vector_valuetypes()) {
// add/sub are legal for all supported vector VT's.
- setOperationAction(ISD::ADD , VT, Legal);
- setOperationAction(ISD::SUB , VT, Legal);
-
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+
// Vector instructions introduced in P8
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
setOperationAction(ISD::CTPOP, VT, Legal);
@@ -477,6 +484,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
for (MVT InnerVT : MVT::vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
@@ -519,12 +528,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
-
- if (Subtarget.hasP8Altivec())
+ if (Subtarget.hasP8Altivec())
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
else
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
-
+
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v16i8, Custom);
@@ -545,6 +553,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+ if (Subtarget.hasP8Vector()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+ }
+ if (Subtarget.hasDirectMove()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+ }
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -813,15 +836,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::SRA_I128, nullptr);
}
- if (isPPC64) {
- setStackPointerRegisterToSaveRestore(PPC::X1);
- setExceptionPointerRegister(PPC::X3);
- setExceptionSelectorRegister(PPC::X4);
- } else {
- setStackPointerRegisterToSaveRestore(PPC::R1);
- setExceptionPointerRegister(PPC::R3);
- setExceptionSelectorRegister(PPC::R4);
- }
+ setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
@@ -942,9 +957,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
- getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == MaxMaxAlign)
@@ -969,6 +984,10 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
return Align;
}
+bool PPCTargetLowering::useSoftFloat() const {
+ return Subtarget.useSoftFloat();
+}
+
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((PPCISD::NodeType)Opcode) {
case PPCISD::FIRST_NUMBER: break;
@@ -992,6 +1011,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
case PPCISD::SRL: return "PPCISD::SRL";
case PPCISD::SRA: return "PPCISD::SRA";
@@ -1236,7 +1256,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two
+/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1261,7 +1281,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two
+/// The ShuffleKind distinguishes between big-endian merges with two
/// different inputs (0), either-endian merges with two identical inputs (1),
/// and little-endian merges with two different inputs (2). For the latter,
/// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1353,7 +1373,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
* - 2 = little-endian merge with two different inputs (inputs are swapped for
* little-endian merges).
* \param[in] DAG The current SelectionDAG
- * \return true iff this shuffle mask
+ * \return true iff this shuffle mask
*/
bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG) {
@@ -1380,7 +1400,7 @@ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-/// The ShuffleKind distinguishes between big-endian operations with two
+/// The ShuffleKind distinguishes between big-endian operations with two
/// different inputs (0), either-endian operations with two identical inputs
/// (1), and little-endian operations with two different inputs (2). For the
/// latter, the input operands are swapped (see PPCInstrAltivec.td).
@@ -1513,8 +1533,8 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
for (unsigned i = 0; i != Multiple-1; ++i) {
if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
- LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
- LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ LeadingZero &= isNullConstant(UniquedVals[i]);
+ LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
}
// Finally, check the least significant entry.
if (LeadingZero) {
@@ -1629,7 +1649,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
-
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
@@ -1998,10 +2017,10 @@ static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
SDValue Ops[] = { GA, Reg };
- return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
- DAG.getVTList(VT, MVT::Other), Ops, VT,
- MachinePointerInfo::getGOT(), 0, false, true,
- false, 0);
+ return DAG.getMemIntrinsicNode(
+ PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
+ false, 0);
}
SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
@@ -2092,6 +2111,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
// large models could be added if users need it, at the cost of
// additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2480,7 +2502,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
// */
// } va_list[1];
-
SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
@@ -2536,7 +2557,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-// Function whose sole purpose is to kill compiler warnings
+// Function whose sole purpose is to kill compiler warnings
// stemming from unused functions included from PPCGenCallingConv.inc.
CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
@@ -2933,8 +2954,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
PPC::F8
};
unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
- if (DisablePPCFloatInVariadic)
- NumFPArgRegs = 0;
+
+ if (Subtarget.useSoftFloat())
+ NumFPArgRegs = 0;
FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
@@ -3177,15 +3199,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
(ObjSize == 2 ? MVT::i16 : MVT::i32));
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
- MachinePointerInfo(FuncArg),
- ObjType, false, false, 0);
+ MachinePointerInfo(&*FuncArg), ObjType,
+ false, false, 0);
} else {
// For sizes that don't fit a truncating store (3, 5, 6, 7),
// store the whole register as-is to the parameter save area
// slot.
- Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg),
- false, false, 0);
+ Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg), false, false, 0);
}
MemOps.push_back(Store);
@@ -3212,9 +3234,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Off = DAG.getConstant(j, dl, PtrVT);
Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
}
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
}
@@ -3592,7 +3614,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg),
+ MachinePointerInfo(&*FuncArg),
ObjType, false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
@@ -3615,9 +3637,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(FuncArg, j),
- false, false, 0);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(&*FuncArg, j), false, false, 0);
MemOps.push_back(Store);
++GPR_idx;
ArgOffset += PtrByteSize;
@@ -3880,7 +3902,6 @@ struct TailCallArgumentInfo {
TailCallArgumentInfo() : FrameIdx(0) {}
};
-
}
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
@@ -3895,9 +3916,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue FIN = TailCallArgs[i].FrameIdxOp;
int FI = TailCallArgs[i].FrameIdx;
// Store relative to framepointer.
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, 0));
}
}
@@ -3922,9 +3944,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
- Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
- MachinePointerInfo::getFixedStack(NewRetAddr),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
+ false, false, 0);
// When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
// slot as the FP is never overwritten.
@@ -3933,9 +3956,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
- Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
- MachinePointerInfo::getFixedStack(NewFPIdx),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
+ false, false, 0);
}
}
return Chain;
@@ -4812,8 +4836,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
break;
case MVT::v4f32:
- // When using QPX, this is handled like a FP register, otherwise, it
- // is an Altivec register.
+ // When using QPX, this is handled like a FP register, otherwise, it
+ // is an Altivec register.
if (Subtarget.hasQPX()) {
if (++NumFPRsUsed <= NumFPRs)
continue;
@@ -5318,9 +5342,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
- Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
- MachinePointerInfo::getStack(TOCSaveOffset),
- false, false, 0);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, AddPtr,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+ false, false, 0);
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
@@ -5341,9 +5366,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
FPOp, true, TailCallArguments);
- return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
- hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
- Callee, SPDiff, NumBytes, Ins, InVals, CS);
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest,
+ DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
+ SPDiff, NumBytes, Ins, InVals, CS);
}
SDValue
@@ -5798,6 +5823,22 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
}
+SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
+ SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const {
+ SDLoc dl(Op);
+
+ // Get the corect type for integers.
+ EVT IntVT = Op.getValueType();
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNAREAOFFSET node.
+ SDValue Ops[2] = {Chain, FPSIdx};
+ SDVTList VTs = DAG.getVTList(IntVT);
+ return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
+}
+
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// When we pop the dynamic allocation we need to restore the SP link.
@@ -5828,10 +5869,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
false, false, 0);
}
-
-
-SDValue
-PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
bool isPPC64 = Subtarget.isPPC64();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
@@ -5983,6 +6021,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
if (!DAG.getTarget().Options.NoInfsFPMath ||
!DAG.getTarget().Options.NoNaNsFPMath)
return Op;
+ // TODO: Propagate flags from the select rather than global settings.
+ SDNodeFlags Flags;
+ Flags.setNoInfs(true);
+ Flags.setNoNaNs(true);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
@@ -6033,7 +6075,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
case ISD::SETNE:
std::swap(TV, FV);
case ISD::SETEQ:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6043,25 +6085,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
case ISD::SETULT:
case ISD::SETLT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOGE:
case ISD::SETGE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
case ISD::SETUGT:
case ISD::SETGT:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
case ISD::SETOLE:
case ISD::SETLE:
- Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6101,7 +6143,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
(Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
- MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
// Emit a store to the stack slot.
SDValue Chain;
@@ -6291,11 +6334,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
// This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-
+
SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
- FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
- FPHalfs, FPHalfs, FPHalfs, FPHalfs);
-
+ FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs,
+ FPHalfs, FPHalfs);
+
Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
if (Op.getValueType() != MVT::v4f64)
@@ -6421,17 +6464,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
RLI.Chain = Store;
- RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = 4;
MachineMemOperand *MMO =
@@ -6472,16 +6516,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
"Expected an i32 store");
RLI.Ptr = FIdx;
RLI.Chain = Store;
- RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+ RLI.MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
RLI.Alignment = 4;
}
@@ -6506,14 +6552,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Op.getOperand(0));
// STD the extended value into the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, 0);
+ SDValue Store = DAG.getStore(
+ DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, 0);
// Load the value as a double.
- Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
- MachinePointerInfo::getFixedStack(FrameIdx),
- false, false, false, 0);
+ Ld = DAG.getLoad(
+ MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+ false, false, false, 0);
}
// FCFID it and return it.
@@ -6735,7 +6783,6 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
}
-
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
@@ -6768,7 +6815,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// to a zero vector to get the boolean result.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -6794,8 +6842,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (unsigned i = 0; i < 4; ++i) {
if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
- else if (cast<ConstantSDNode>(BVN->getOperand(i))->
- getConstantIntValue()->isZero())
+ else if (isNullConstant(BVN->getOperand(i)))
continue;
else
CV[i] = One;
@@ -6814,9 +6861,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
ValueVTs.push_back(MVT::Other); // chain
SDVTList VTs = DAG.getVTList(ValueVTs);
- return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
- dl, VTs, Ops, MVT::v4f32,
- MachinePointerInfo::getConstantPool());
+ return DAG.getMemIntrinsicNode(
+ PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
SmallVector<SDValue, 4> Stores;
@@ -6915,7 +6962,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (SextVal >= -16 && SextVal <= 15)
return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
-
// Two instruction sequences.
// If this value is in the range [-32,30] and is even, use:
@@ -7304,11 +7350,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
V1, V2, VPermMask);
}
-/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
-/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
+/// vector comparison. If it is, return true and fill in Opc/isDot with
/// information about the intrinsic.
-static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
- bool &isDot, const PPCSubtarget &Subtarget) {
+static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
@@ -7321,12 +7367,11 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequd_p:
+ case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 199;
- isDot = 1;
- }
- else
+ CompareOpc = 199;
+ isDot = 1;
+ } else
return false;
break;
@@ -7335,28 +7380,48 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsd_p:
+ case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 967;
- isDot = 1;
- }
- else
+ CompareOpc = 967;
+ isDot = 1;
+ } else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtud_p:
+ case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 711;
- isDot = 1;
+ CompareOpc = 711;
+ isDot = 1;
+ } else
+ return false;
+
+ break;
+ // VSX predicate comparisons use the same infrastructure
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ if (Subtarget.hasVSX()) {
+ switch (IntrinsicID) {
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ }
+ isDot = 1;
}
- else
+ else
return false;
break;
-
+
// Normal Comparisons.
case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
@@ -7365,10 +7430,9 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpequd:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 199;
- isDot = 0;
- }
- else
+ CompareOpc = 199;
+ isDot = 0;
+ } else
return false;
break;
@@ -7377,24 +7441,22 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsd:
+ case Intrinsic::ppc_altivec_vcmpgtsd:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 967;
- isDot = 0;
- }
- else
+ CompareOpc = 967;
+ isDot = 0;
+ } else
return false;
break;
case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtud:
+ case Intrinsic::ppc_altivec_vcmpgtud:
if (Subtarget.hasP8Altivec()) {
- CompareOpc = 711;
- isDot = 0;
- }
- else
+ CompareOpc = 711;
+ isDot = 0;
+ } else
return false;
break;
@@ -7411,7 +7473,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDLoc dl(Op);
int CompareOpc;
bool isDot;
- if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
+ if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
return SDValue(); // Don't custom lower most intrinsics.
// If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7536,7 +7598,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
FPHalfs, FPHalfs, FPHalfs, FPHalfs);
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
// Now convert to an integer and store.
Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7545,7 +7607,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -7752,7 +7815,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
FPHalfs, FPHalfs, FPHalfs, FPHalfs);
- Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
+ Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
// Now convert to an integer and store.
Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7761,7 +7824,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
- MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+ MachinePointerInfo PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -7798,11 +7862,10 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
- Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
- SN->getPointerInfo().getWithOffset(i),
- MVT::i8 /* memory type */,
- SN->isNonTemporal(), SN->isVolatile(),
- 1 /* alignment */, SN->getAAInfo()));
+ Stores.push_back(DAG.getTruncStore(
+ StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
+ MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(),
+ 1 /* alignment */, SN->getAAInfo()));
}
StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
@@ -7906,6 +7969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
+ case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -7971,7 +8035,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
N->getValueType(0));
SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
- N->getOperand(1));
+ N->getOperand(1));
Results.push_back(NewInt);
Results.push_back(NewInt.getValue(1));
@@ -8020,7 +8084,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
@@ -8089,8 +8152,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
@@ -8160,8 +8222,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction *F = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
unsigned dest = MI->getOperand(0).getReg();
unsigned ptrA = MI->getOperand(1).getReg();
@@ -8283,8 +8344,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -8384,8 +8444,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addMBB(mainMBB);
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
- thisMBB->addSuccessor(mainMBB, /* weight */ 0);
- thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+ thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+ thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
// mainMBB:
// mainDstReg = 0
@@ -8562,8 +8622,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// To "insert" these instructions we actually have to insert their
// control-flow patterns.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
MachineFunction *F = BB->getParent();
@@ -8675,7 +8734,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// mfspr Rx,TBU # load from TBU
// mfspr Ry,TB # load from TB
// mfspr Rz,TBU # load from TBU
- // cmpw crX,Rx,Rz # check if ‘old’=’new’
+ // cmpw crX,Rx,Rz # check if 'old'='new'
// bne readLoop # branch if they're not equal
// ...
@@ -9137,7 +9196,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
return SDValue();
}
-bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
// Note: This functionality is used only when unsafe-fp-math is enabled, and
// on cores with reciprocal estimates (which are used when unsafe-fp-math is
// enabled for division), this functionality is redundant with the default
@@ -9150,12 +9209,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
// one FP pipeline) for three or more FDIVs (for generic OOO cores).
switch (Subtarget.getDarwinDirective()) {
default:
- return NumUsers > 2;
+ return 3;
case PPC::DIR_440:
case PPC::DIR_A2:
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
- return NumUsers > 1;
+ return 2;
+ }
+}
+
+// isConsecutiveLSLoc needs to work even if all adds have not yet been
+// collapsed, and so we need to look through chains of them.
+static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
+ int64_t& Offset, SelectionDAG &DAG) {
+ if (DAG.isBaseWithConstantOffset(Loc)) {
+ Base = Loc.getOperand(0);
+ Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+
+ // The base might itself be a base plus an offset, and if so, accumulate
+ // that as well.
+ getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
}
}
@@ -9178,16 +9251,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- // Handle X+C
- if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
- cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ SDValue Base1 = Loc, Base2 = BaseLoc;
+ int64_t Offset1 = 0, Offset2 = 0;
+ getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
+ getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
+ if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const GlobalValue *GV1 = nullptr;
const GlobalValue *GV2 = nullptr;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
+ Offset1 = 0;
+ Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
if (isGA1 && isGA2 && GV1 == GV2)
@@ -9343,7 +9418,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
IE = LoadRoots.end(); I != IE; ++I) {
Queue.push_back(*I);
-
+
while (!Queue.empty()) {
SDNode *LoadRoot = Queue.pop_back_val();
if (!Visited.insert(LoadRoot).second)
@@ -9470,7 +9545,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
}
// Visit all inputs, collect all binary operations (and, or, xor and
- // select) that are all fed by extensions.
+ // select) that are all fed by extensions.
while (!BinOps.empty()) {
SDValue BinOp = BinOps.back();
BinOps.pop_back();
@@ -9492,7 +9567,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
isa<ConstantSDNode>(BinOp.getOperand(i))) {
- Inputs.push_back(BinOp.getOperand(i));
+ Inputs.push_back(BinOp.getOperand(i));
} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
BinOp.getOperand(i).getOpcode() == ISD::OR ||
BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9572,7 +9647,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
else
- DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
}
// Replace all operations (these are all the same, but have a different
@@ -9682,7 +9757,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
SmallPtrSet<SDNode *, 16> Visited;
// Visit all inputs, collect all binary operations (and, or, xor and
- // select) that are all fed by truncations.
+ // select) that are all fed by truncations.
while (!BinOps.empty()) {
SDValue BinOp = BinOps.back();
BinOps.pop_back();
@@ -9701,7 +9776,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
isa<ConstantSDNode>(BinOp.getOperand(i))) {
- Inputs.push_back(BinOp.getOperand(i));
+ Inputs.push_back(BinOp.getOperand(i));
} else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
BinOp.getOperand(i).getOpcode() == ISD::OR ||
BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9915,10 +9990,11 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
"Invalid extension type");
EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
SDValue ShiftCst =
- DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
- return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
- DAG.getNode(ISD::SHL, dl, N->getValueType(0),
- N->getOperand(0), ShiftCst), ShiftCst);
+ DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
+ return DAG.getNode(
+ ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
+ ShiftCst);
}
SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
@@ -10102,16 +10178,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) {
default: break;
case PPCISD::SHL:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue()) // 0 << V -> 0.
+ if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
- }
break;
case PPCISD::SRL:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue()) // 0 >>u V -> 0.
+ if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
return N->getOperand(0);
- }
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
@@ -10122,7 +10194,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND:
return DAGCombineExtBoolTrunc(N, DCI);
case ISD::TRUNCATE:
case ISD::SETCC:
@@ -10277,7 +10349,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// original unaligned load.
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *BaseMMO =
- MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+ MF.getMachineMemOperand(LD->getMemOperand(),
+ -(long)MemVT.getStoreSize()+1,
2*MemVT.getStoreSize()-1);
// Create the new base load.
@@ -10527,7 +10600,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
-
+
if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero) {
@@ -10558,8 +10631,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
Intrinsic::ppc_is_decremented_ctr_nonzero &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
- isZero())
+ !isNullConstant(LHS.getOperand(1)))
LHS = LHS.getOperand(0);
if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -10588,7 +10660,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
- getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
+ getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
assert(isDot && "Can't compare against a vector result!");
// If this is a comparison against something other than 0/1, then we know
@@ -10739,8 +10811,11 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
// boundary so that the entire loop fits in one instruction-cache line.
uint64_t LoopSize = 0;
for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
- for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+ for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
LoopSize += TII->GetInstSizeInBytes(J);
+ if (LoopSize > 32)
+ break;
+ }
if (LoopSize > 16 && LoopSize <= 32)
return 5;
@@ -10868,17 +10943,19 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::QFRCRegClass);
if (VT == MVT::v4f32 && Subtarget.hasQPX())
return std::make_pair(0U, &PPC::QSRCRegClass);
- return std::make_pair(0U, &PPC::VRRCRegClass);
+ if (Subtarget.hasAltivec())
+ return std::make_pair(0U, &PPC::VRRCRegClass);
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
}
- } else if (Constraint == "wc") { // an individual CR bit.
+ } else if (Constraint == "wc" && Subtarget.useCRBits()) {
+ // An individual CR bit.
return std::make_pair(0U, &PPC::CRBITRCRegClass);
- } else if (Constraint == "wa" || Constraint == "wd" ||
- Constraint == "wf") {
+ } else if ((Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") && Subtarget.hasVSX()) {
return std::make_pair(0U, &PPC::VSRCRegClass);
- } else if (Constraint == "ws") {
- if (VT == MVT::f32)
+ } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+ if (VT == MVT::f32 && Subtarget.hasP8Vector())
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
return std::make_pair(0U, &PPC::VSFRCRegClass);
@@ -10908,7 +10985,6 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return R;
}
-
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -11358,9 +11434,7 @@ bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
- if (BitSize == 0 || BitSize > 64)
- return false;
- return true;
+ return !(BitSize == 0 || BitSize > 64);
}
bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -11477,11 +11551,21 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
+unsigned PPCTargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
+}
+
+unsigned PPCTargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
+}
+
bool
PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT , unsigned DefinedValues) const {
if (VT == MVT::v2i64)
- return false;
+ return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
if (Subtarget.hasQPX()) {
if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 6e13533..44bcb89 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -79,6 +79,11 @@ namespace llvm {
/// compute an allocation on the stack.
DYNALLOC,
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an offset from native SP to the address of the most recent
+ /// dynamic alloca.
+ DYNAREAOFFSET,
+
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
/// at function entry, used for PIC code.
GlobalBaseReg,
@@ -423,6 +428,8 @@ namespace llvm {
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ bool useSoftFloat() const override;
+
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
}
@@ -655,8 +662,17 @@ namespace llvm {
return Ty->isArrayTy();
}
- private:
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ private:
struct ReuseLoadInfo {
SDValue Ptr;
SDValue Chain;
@@ -719,6 +735,8 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
+ SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -853,7 +871,7 @@ namespace llvm {
bool &UseOneConstNR) const override;
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index d628330..075e093 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -369,6 +369,8 @@ let Defs = [X1], Uses = [X1] in
def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
[(set i64:$result,
(PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+ [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
let Defs = [LR8] in {
def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index d4e666c..c17603a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
UseMI, UseIdx);
+ if (!DefMI->getParent())
+ return Latency;
+
const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
unsigned Reg = DefMO.getReg();
@@ -186,6 +189,60 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return Latency;
}
+// This function does not list all associative and commutative operations, but
+// only those worth feeding through the machine combiner in an attempt to
+// reduce the critical path. Mostly, this means floating-point operations,
+// because they have high latencies (compared to other operations, such and
+// and/or, which are also associative and commutative, but have low latencies).
+bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ // FP Add:
+ case PPC::FADD:
+ case PPC::FADDS:
+ // FP Multiply:
+ case PPC::FMUL:
+ case PPC::FMULS:
+ // Altivec Add:
+ case PPC::VADDFP:
+ // VSX Add:
+ case PPC::XSADDDP:
+ case PPC::XVADDDP:
+ case PPC::XVADDSP:
+ case PPC::XSADDSP:
+ // VSX Multiply:
+ case PPC::XSMULDP:
+ case PPC::XVMULDP:
+ case PPC::XVMULSP:
+ case PPC::XSMULSP:
+ // QPX Add:
+ case PPC::QVFADD:
+ case PPC::QVFADDS:
+ case PPC::QVFADDSs:
+ // QPX Multiply:
+ case PPC::QVFMUL:
+ case PPC::QVFMULS:
+ case PPC::QVFMULSs:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool PPCInstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ // Using the machine combiner in this way is potentially expensive, so
+ // restrict to when aggressive optimizations are desired.
+ if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+ return false;
+
+ // FP reassociation is only legal when we don't need strict IEEE semantics.
+ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+ return false;
+
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -259,16 +316,16 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-// commuteInstruction - We can commute rlwimi instructions, but only if the
-// rotate amt is zero. We also have to munge the immediates a bit.
-MachineInstr *
-PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
MachineFunction &MF = *MI->getParent()->getParent();
// Normal instructions can be commuted the obvious way.
if (MI->getOpcode() != PPC::RLWIMI &&
MI->getOpcode() != PPC::RLWIMIo)
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
// changing the relative order of the mask operands might change what happens
@@ -286,6 +343,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Op0 = (Op2 & ~M) | (Op1 & M)
// Swap op1/op2
+ assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
+ "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
unsigned Reg2 = MI->getOperand(2).getReg();
@@ -353,9 +412,9 @@ bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
if (AltOpc == -1)
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
- SrcOpIdx1 = 2;
- SrcOpIdx2 = 3;
- return true;
+ // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
+ // and SrcOpIdx2.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
}
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -996,11 +1055,10 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
@@ -1109,11 +1167,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MBB.insert(MI, NewMIs[i]);
const MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIdx),
- MFI.getObjectAlignment(FrameIdx));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FrameIdx),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
NewMIs.back()->addMemOperand(MF, MMO);
}
@@ -1214,7 +1271,7 @@ bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
}
@@ -1691,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
MI->setDesc(NewDesc);
if (NewDesc.ImplicitDefs)
- for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+ for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs();
*ImpDefs; ++ImpDefs)
if (!MI->definesRegister(*ImpDefs))
MI->addOperand(*MI->getParent()->getParent(),
MachineOperand::CreateReg(*ImpDefs, true, true));
if (NewDesc.ImplicitUses)
- for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+ for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses();
*ImpUses; ++ImpUses)
if (!MI->readsRegister(*ImpUses))
MI->addOperand(*MI->getParent()->getParent(),
@@ -1737,3 +1794,35 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
}
}
+std::pair<unsigned, unsigned>
+PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ const unsigned Mask = PPCII::MO_ACCESS_MASK;
+ return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_LO, "ppc-lo"},
+ {MO_HA, "ppc-ha"},
+ {MO_TPREL_LO, "ppc-tprel-lo"},
+ {MO_TPREL_HA, "ppc-tprel-ha"},
+ {MO_DTPREL_LO, "ppc-dtprel-lo"},
+ {MO_TLSLD_LO, "ppc-tlsld-lo"},
+ {MO_TOC_LO, "ppc-toc-lo"},
+ {MO_TLS, "ppc-tls"}};
+ return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+ using namespace PPCII;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_PLT_OR_STUB, "ppc-plt-or-stub"},
+ {MO_PIC_FLAG, "ppc-pic"},
+ {MO_NLP_FLAG, "ppc-nlp"},
+ {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+ return makeArrayRef(TargetFlags);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 40badae..c3c3a48 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -79,6 +79,23 @@ class PPCInstrInfo : public PPCGenInstrInfo {
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
+
+protected:
+ /// Commutes the operands in the given instruction.
+ /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+ ///
+ /// Do not call this method for a non-commutable instruction or for
+ /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ ///
+ /// For example, we can commute rlwimi instructions, but only if the
+ /// rotate amt is zero. We also have to munge the immediates a bit.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const override;
+
public:
explicit PPCInstrInfo(PPCSubtarget &STI);
@@ -119,6 +136,19 @@ public:
return false;
}
+ bool useMachineCombiner() const override {
+ return true;
+ }
+
+ /// Return true when there is potentially a faster code sequence
+ /// for an instruction chain ending in <Root>. All potential patterns are
+ /// output in the <Pattern> array.
+ bool getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &P) const override;
+
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const override;
@@ -127,10 +157,6 @@ public:
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const override;
- // commuteInstruction - We can commute rlwimi instructions, but only if the
- // rotate amt is zero. We also have to munge the immediates a bit.
- MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
@@ -183,7 +209,7 @@ public:
// profitable to use the predicated branches.
bool isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override {
+ BranchProbability Probability) const override {
return true;
}
@@ -191,12 +217,10 @@ public:
unsigned NumT, unsigned ExtraT,
MachineBasicBlock &FMBB,
unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- const BranchProbability
- &Probability) const override {
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ BranchProbability Probability) const override {
return true;
}
@@ -239,6 +263,15 @@ public:
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableBitmaskMachineOperandTargetFlags() const override;
};
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 24fd9bd..6c4364a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -226,7 +226,9 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
// Instructions to support dynamic alloca.
def SDTDynOp : SDTypeProfile<1, 2, []>;
+def SDTDynAreaOp : SDTypeProfile<1, 1, []>;
def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// PowerPC specific transformation functions and pattern fragments.
@@ -1029,6 +1031,8 @@ let Defs = [R1], Uses = [R1] in
def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
[(set i32:$result,
(PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+ [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
// instruction selection into a branch sequence.
@@ -3883,8 +3887,11 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0,
def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
-def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+// The POWER variant
+def : MnemonicAlias<"cntlz", "cntlzw">;
+def : MnemonicAlias<"cntlz.", "cntlzw.">;
def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index 0a044c5..4312007 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -839,31 +839,31 @@ def : Pat<(v4f64 (scalar_to_vector f64:$A)),
def : Pat<(v4f32 (scalar_to_vector f32:$A)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+def : Pat<(f64 (extractelt v4f64:$S, 0)),
(EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+def : Pat<(f32 (extractelt v4f32:$S, 0)),
(EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+def : Pat<(f64 (extractelt v4f64:$S, 1)),
(EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+def : Pat<(f64 (extractelt v4f64:$S, 2)),
(EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+def : Pat<(f64 (extractelt v4f64:$S, 3)),
(EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+def : Pat<(f32 (extractelt v4f32:$S, 1)),
(EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+def : Pat<(f32 (extractelt v4f32:$S, 2)),
(EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+def : Pat<(f32 (extractelt v4f32:$S, 3)),
(EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
(EXTRACT_SUBREG (QVFPERM $S, $S,
(QVLPCLSXint (RLDICR $F, 2,
/* 63-2 = */ 61))),
sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
(EXTRACT_SUBREG (QVFPERMs $S, $S,
(QVLPCLSXint (RLDICR $F, 2,
/* 63-2 = */ 61))),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index ce63c22..df1142c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,17 +67,19 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
-multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
- string asmbase, string asmstr, InstrItinClass itin,
- list<dag> pattern> {
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
+ string asmstr, InstrItinClass itin, Intrinsic Int,
+ ValueType OutTy, ValueType InTy> {
let BaseName = asmbase in {
- def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(" ", asmstr)), itin,
- pattern>;
+ [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
let Defs = [CR6] in
- def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ def o : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
!strconcat(asmbase, !strconcat(". ", asmstr)), itin,
- []>, isDOT;
+ [(set InTy:$XT,
+ (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+ isDOT;
}
}
@@ -456,35 +458,23 @@ let Uses = [RM] in {
"xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
defm XVCMPEQSP : XX3Form_Rcr<60, 67,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>;
defm XVCMPGEDP : XX3Form_Rcr<60, 115,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpgedp, v2i64, v2f64>;
defm XVCMPGESP : XX3Form_Rcr<60, 83,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpgesp, v4i32, v4f32>;
defm XVCMPGTDP : XX3Form_Rcr<60, 107,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v2i64:$XT,
- (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>;
+ int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>;
defm XVCMPGTSP : XX3Form_Rcr<60, 75,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
"xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
- [(set v4i32:$XT,
- (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>;
+ int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
// Move Instructions
def XSABSDP : XX2Form<60, 345,
@@ -845,9 +835,9 @@ let Predicates = [IsBigEndian] in {
def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
}
@@ -856,9 +846,9 @@ def : Pat<(v2f64 (scalar_to_vector f64:$A)),
(v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
(SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
(f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
(f64 (EXTRACT_SUBREG $S, sub_64))>;
}
@@ -1206,6 +1196,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
AltVSXFMARel;
}
+
+ // Single Precision Conversions (FP <-> INT)
+ def XSCVSXDSP : XX2Form<60, 312,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvsxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ def XSCVUXDSP : XX2Form<60, 296,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvuxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+
+ // Conversions between vector and scalar single precision
+ def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+ "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+ "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+
} // AddedComplexity = 400
} // HasP8Vector
@@ -1229,3 +1236,550 @@ let Predicates = [HasDirectMove, HasVSX] in {
"mtvsrwz $XT, $rA", IIC_VecGeneral,
[(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
} // HasDirectMove, HasVSX
+
+/* Direct moves of various widths from GPR's into VSR's. Each move lines
+ the value up into element 0 (both BE and LE). Namely, entities smaller than
+ a doubleword are shifted left and moved for BE. For LE, they're moved, then
+ swapped to go into the least significant element of the VSR.
+*/
+def MovesToVSR {
+ dag BE_BYTE_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+ dag BE_DWORD_0 = (MTVSRD $A);
+
+ dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+ dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ LE_MTVSRW, sub_64));
+ dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+ dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ BE_DWORD_0, sub_64));
+ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+/* Patterns for extracting elements out of vectors. Integer elements are
+ extracted using direct move operations. Patterns for extracting elements
+ whose indices are not available at compile time are also provided with
+ various _VARIABLE_ patterns.
+ The numbering for the DAG's is for LE, but when used on BE, the correct
+ LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def VectorExtractions {
+ // Doubleword extraction
+ dag LE_DWORD_0 =
+ (MFVSRD
+ (EXTRACT_SUBREG
+ (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+ (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+ dag LE_DWORD_1 = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+ // Word extraction
+ dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+ dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+ dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+ dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+ // Halfword extraction
+ dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+ dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+ dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+ dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+ dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+ dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+ dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+ dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+ // Byte extraction
+ dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+ dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+ dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+ dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+ dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+ dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+ dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+ dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+ dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+ dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+ dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+ dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+ dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+ dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+ dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+ dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+ /* Variable element number (BE and LE patterns must be specified separately)
+ This is a rather involved process.
+
+ Conceptually, this is how the move is accomplished:
+ 1. Identify which doubleword contains the element
+ 2. Shift in the VMX register so that the correct doubleword is correctly
+ lined up for the MFVSRD
+ 3. Perform the move so that the element (along with some extra stuff)
+ is in the GPR
+ 4. Right shift within the GPR so that the element is right-justified
+
+ Of course, the index is an element number which has a different meaning
+ on LE/BE so the patterns have to be specified separately.
+
+ Note: The final result will be the element right-justified with high
+ order bits being arbitrarily defined (namely, whatever was in the
+ vector register to the left of the value originally).
+ */
+
+ /* LE variable byte
+ Number 1. above:
+ - For elements 0-7, we shift left by 8 bytes since they're on the right
+ - For elements 8-15, we need not shift (shift left by zero bytes)
+ This is accomplished by inverting the bits of the index and AND-ing
+ with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+ */
+ dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-7 (8-15 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 8 as we need to shift right by the number of bits, not bytes
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+ sub_32);
+
+ /* LE variable halfword
+ Number 1. above:
+ - For elements 0-3, we shift left by 8 since they're on the right
+ - For elements 4-7, we need not shift (shift left by zero bytes)
+ Similarly to the byte pattern, we invert the bits of the index, but we
+ AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+ Of course, the shift is still by 8 bytes, so we must multiply by 2.
+ */
+ dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-3 (4-7 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 16 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+ sub_32);
+
+ /* LE variable word
+ Number 1. above:
+ - For elements 0-1, we shift left by 8 since they're on the right
+ - For elements 2-3, we need not shift
+ */
+ dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* Number 4. above:
+ - Truncate the element number to the range 0-1 (2-3 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 32 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
+ sub_32);
+
+ /* LE variable doubleword
+ Number 1. above:
+ - For element 0, we shift left by 8 since it's on the right
+ - For element 1, we need not shift
+ */
+ dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60));
+
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC);
+
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ // - Number 4. is not needed for the doubleword as the value is 64-bits
+ dag LE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* LE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61));
+ dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
+ dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
+
+ /* LE variable double
+ Same as the LE doubleword except there is no move.
+ */
+ dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ LE_VDWORD_PERM_VEC);
+ dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
+
+ /* BE variable byte
+ The algorithm here is the same as the LE variable byte except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x8
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-7
+ */
+ dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
+ dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
+ dag BE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+ sub_32);
+
+ /* BE variable halfword
+ The algorithm here is the same as the LE variable halfword except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x4 and multiply by 2
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-3
+ */
+ dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
+ dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
+ dag BE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+ sub_32);
+
+ /* BE variable word
+ The algorithm is the same as the LE variable word except:
+ - The shift in the VMX register happens for opposite element numbers
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-1
+ */
+ dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61));
+ dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC);
+ dag BE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
+ sub_32);
+
+ /* BE variable doubleword
+ Same as the LE doubleword except we shift in the VMX register for opposite
+ element indices.
+ */
+ dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60));
+ dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
+
+ /* BE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61));
+ dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
+ dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
+
+ /* BE variable double
+ Same as the BE doubleword except there is no move.
+ */
+ dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+ (COPY_TO_REGCLASS $S, VRRC),
+ BE_VDWORD_PERM_VEC);
+ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
+}
+
+// v4f32 scalar <-> vector conversions (BE)
+let Predicates = [IsBigEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+} // IsBigEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsBigEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsBigEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (BE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (BE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (BE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // IsBigEndian, HasDirectMove
+
+// v4f32 scalar <-> vector conversions (LE)
+let Predicates = [IsLittleEndian, HasP8Vector] in {
+ def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+ def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+} // IsLittleEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsLittleEndian, HasVSX] in
+ def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsLittleEndian, HasDirectMove] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 MovesToVSR.LE_WORD_0)>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 MovesToVSR.LE_DWORD_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+ def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+ // v8i16 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+ def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+ // v4i32 scalar <-> vector conversions (LE)
+ def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+ def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // IsLittleEndian, HasDirectMove
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
+
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
+
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index b4e1c09..e3a35d5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -71,10 +72,10 @@ namespace {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
// FIXME: For some reason, preserving SE here breaks LSR (even if
// this pass changes nothing).
- // AU.addPreserved<ScalarEvolution>();
+ // AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -96,7 +97,7 @@ INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
"PPC Loop Data Prefetch", false, false)
@@ -104,7 +105,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DL = &F.getParent()->getDataLayout();
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index b6e7799..5e18826 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -73,7 +73,7 @@ namespace {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolution>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
}
bool runOnFunction(Function &F) override;
@@ -84,8 +84,10 @@ namespace {
private:
PPCTargetMachine *TM;
+ DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
+ bool PreserveLCSSA;
};
}
@@ -93,7 +95,7 @@ char PPCLoopPreIncPrep::ID = 0;
static const char *name = "Prepare loop for pre-inc. addressing modes";
INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
@@ -101,17 +103,20 @@ FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
}
namespace {
- struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
- {
- SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+ struct BucketElement {
+ BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
- bool operator() (const SCEV *X, const SCEV *Y) const {
- const SCEV *Diff = SE->getMinusSCEV(X, Y);
- return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
- }
+ const SCEVConstant *Offset;
+ Instruction *Instr;
+ };
- protected:
- ScalarEvolution *SE;
+ struct Bucket {
+ Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+ Elements(1, BucketElement(I)) {}
+
+ const SCEV *BaseSCEV;
+ SmallVector<BucketElement, 16> Elements;
};
}
@@ -140,7 +145,10 @@ static Value *GetPointerOperand(Value *MemI) {
bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolution>();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
bool MadeChange = false;
@@ -169,7 +177,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
std::distance(pred_begin(Header), pred_end(Header));
// Collect buckets of comparable addresses used by loads and stores.
- typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
SmallVector<Bucket, 16> Buckets;
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
I != IE; ++I) {
@@ -212,25 +219,24 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
}
bool FoundBucket = false;
- for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
- for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
- K != KE; ++K) {
- const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
- if (isa<SCEVConstant>(Diff)) {
- Buckets[i].insert(std::make_pair(LSCEV, MemI));
- FoundBucket = true;
- break;
- }
+ for (auto &B : Buckets) {
+ const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+ if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+ B.Elements.push_back(BucketElement(CDiff, MemI));
+ FoundBucket = true;
+ break;
}
+ }
if (!FoundBucket) {
- Buckets.push_back(Bucket(SCEVLess(SE)));
- Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+ if (Buckets.size() == MaxVars)
+ return MadeChange;
+ Buckets.push_back(Bucket(LSCEV, MemI));
}
}
}
- if (Buckets.empty() || Buckets.size() > MaxVars)
+ if (Buckets.empty())
return MadeChange;
BasicBlock *LoopPredecessor = L->getLoopPredecessor();
@@ -239,7 +245,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
!LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
- LoopPredecessor = InsertPreheaderForLoop(L, this);
+ LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
if (LoopPredecessor)
MadeChange = true;
}
@@ -253,8 +259,45 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// The base address of each bucket is transformed into a phi and the others
// are rewritten as offsets of that variable.
+ // We have a choice now of which instruction's memory operand we use as the
+ // base for the generated PHI. Always picking the first instruction in each
+ // bucket does not work well, specifically because that instruction might
+ // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+ // the choice is somewhat arbitrary, because the backend will happily
+ // generate direct offsets from both the pre-incremented and
+ // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+ // instruction in each bucket, and adjust the recurrence and other offsets
+ // accordingly.
+ for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
+ if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
+ if (II->getIntrinsicID() == Intrinsic::prefetch)
+ continue;
+
+ // If we'd otherwise pick the first element anyway, there's nothing to do.
+ if (j == 0)
+ break;
+
+ // If our chosen element has no offset from the base pointer, there's
+ // nothing to do.
+ if (!Buckets[i].Elements[j].Offset ||
+ Buckets[i].Elements[j].Offset->isZero())
+ break;
+
+ const SCEV *Offset = Buckets[i].Elements[j].Offset;
+ Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
+ for (auto &E : Buckets[i].Elements) {
+ if (E.Offset)
+ E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+ else
+ E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+ }
+
+ std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
+ break;
+ }
+
const SCEVAddRecExpr *BasePtrSCEV =
- cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+ cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
if (!BasePtrSCEV->isAffine())
continue;
@@ -262,7 +305,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
assert(BasePtrSCEV->getLoop() == L &&
"AddRec for the wrong loop?");
- Instruction *MemI = Buckets[i].begin()->second;
+ // The instruction corresponding to the Bucket's BaseSCEV must be the first
+ // in the vector of elements.
+ Instruction *MemI = Buckets[i].Elements.begin()->Instr;
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
@@ -302,7 +347,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
}
- Instruction *InsPoint = Header->getFirstInsertionPt();
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
@@ -327,18 +372,20 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
BasePtr->replaceAllUsesWith(NewBasePtr);
RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
- Value *LastNewPtr = NewBasePtr;
- for (Bucket::iterator I = std::next(Buckets[i].begin()),
- IE = Buckets[i].end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->second);
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert( NewBasePtr);
+
+ for (auto I = std::next(Buckets[i].Elements.begin()),
+ IE = Buckets[i].Elements.end(); I != IE; ++I) {
+ Value *Ptr = GetPointerOperand(I->Instr);
assert(Ptr && "No pointer operand");
- if (Ptr == LastNewPtr)
+ if (NewPtrs.count(Ptr))
continue;
Instruction *RealNewPtr;
- const SCEVConstant *Diff =
- cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
- if (Diff->isZero()) {
+ if (!I->Offset || I->Offset->getValue()->isZero()) {
RealNewPtr = NewBasePtr;
} else {
Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
@@ -346,13 +393,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
PtrIP = 0;
else if (isa<PHINode>(PtrIP))
- PtrIP = PtrIP->getParent()->getFirstInsertionPt();
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
else if (!PtrIP)
- PtrIP = I->second;
+ PtrIP = I->Instr;
GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, Diff->getValue(),
- I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+ I8Ty, PtrInc, I->Offset->getValue(),
+ I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
if (!PtrIP)
NewPtr->insertAfter(cast<Instruction>(PtrInc));
NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
@@ -373,7 +420,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
Ptr->replaceAllUsesWith(ReplNewPtr);
RecursivelyDeleteTriviallyDeadInstructions(Ptr);
- LastNewPtr = RealNewPtr;
+ NewPtrs.insert(RealNewPtr);
}
MadeChange = true;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 76837ec..44a692d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
const TargetMachine &TM = AP.TM;
Mangler *Mang = AP.Mang;
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = AP.getDataLayout();
MCContext &Ctx = AP.OutContext;
bool isDarwin = TM.getTargetTriple().isOSDarwin();
@@ -51,13 +51,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
Suffix = "$non_lazy_ptr";
if (!Suffix.empty())
- Name += DL->getPrivateGlobalPrefix();
+ Name += DL.getPrivateGlobalPrefix();
unsigned PrefixLen = Name.size();
if (!MO.isGlobal()) {
assert(MO.isSymbol() && "Isn't a symbol reference");
- Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
} else {
const GlobalValue *GV = MO.getGlobal();
TM.getNameWithPrefix(Name, GV, *Mang);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 0000000..fe339d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer. It runs at the end of
+// the SSA phases, following VSX swap removal. A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here. Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects: it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+ void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+ static char ID;
+ const PPCInstrInfo *TII;
+ MachineFunction *MF;
+ MachineRegisterInfo *MRI;
+
+ PPCMIPeephole() : MachineFunctionPass(ID) {
+ initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+ }
+
+private:
+ // Initialize class variables.
+ void initialize(MachineFunction &MFParm);
+
+ // Perform peepholes.
+ bool simplifyCode(void);
+
+ // Find the "true" register represented by SrcReg (following chains
+ // of copies and subreg_to_reg operations).
+ unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+ // Main entry point for this pass.
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ initialize(MF);
+ return simplifyCode();
+ }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+ MF = &MFParm;
+ MRI = &MF->getRegInfo();
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+ DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+ DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+ bool Simplified = false;
+ MachineInstr* ToErase = nullptr;
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineInstr &MI : MBB) {
+
+ // If the previous instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ // Ignore debug instructions.
+ if (MI.isDebugValue())
+ continue;
+
+ // Per-opcode peepholes.
+ switch (MI.getOpcode()) {
+
+ default:
+ break;
+
+ case PPC::XXPERMDI: {
+ // Perform simplifications of 2x64 vector swaps and splats.
+ // A swap is identified by an immediate value of 2, and a splat
+ // is identified by an immediate value of 0 or 3.
+ int Immed = MI.getOperand(3).getImm();
+
+ if (Immed != 1) {
+
+ // For each of these simplifications, we need the two source
+ // regs to match. Unfortunately, MachineCSE ignores COPY and
+ // SUBREG_TO_REG, so for example we can see
+ // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+ // We have to look through chains of COPY and SUBREG_TO_REG
+ // to find the real source values for comparison.
+ unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+ unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+ if (TrueReg1 == TrueReg2
+ && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+ MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+ // If this is a splat or a swap fed by another splat, we
+ // can replace it with a copy.
+ if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+ unsigned FeedImmed = DefMI->getOperand(3).getImm();
+ unsigned FeedReg1
+ = lookThruCopyLike(DefMI->getOperand(1).getReg());
+ unsigned FeedReg2
+ = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+ if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs()
+ << "Optimizing splat/swap or splat/splat "
+ "to splat/copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(MI.getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+
+ // If this is a splat fed by a swap, we can simplify modify
+ // the splat to splat the other value from the swap's input
+ // parameter.
+ else if ((Immed == 0 || Immed == 3)
+ && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+ DEBUG(MI.dump());
+ MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+ MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+ MI.getOperand(3).setImm(3 - Immed);
+ Simplified = true;
+ }
+
+ // If this is a swap fed by a swap, we can replace it
+ // with a copy from the first swap's input.
+ else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+ DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+ DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(),
+ TII->get(PPC::COPY), MI.getOperand(0).getReg())
+ .addOperand(DefMI->getOperand(1));
+ ToErase = &MI;
+ Simplified = true;
+ }
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+
+ // If the last instruction was marked for elimination,
+ // remove it now.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+ }
+
+ return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg). Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register. If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+ while (true) {
+
+ MachineInstr *MI = MRI->getVRegDef(SrcReg);
+ if (!MI->isCopyLike())
+ return SrcReg;
+
+ unsigned CopySrcReg;
+ if (MI->isCopy())
+ CopySrcReg = MI->getOperand(1).getReg();
+ else {
+ assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+ CopySrcReg = MI->getOperand(2).getReg();
+ }
+
+ if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ return CopySrcReg;
+
+ SrcReg = CopySrcReg;
+ }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+ "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index ec4e0a5..95f1631 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -18,8 +18,8 @@ using namespace llvm;
void PPCFunctionInfo::anchor() { }
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
- const DataLayout *DL = MF.getTarget().getDataLayout();
- return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
Twine(MF.getFunctionNumber()) +
"$poff");
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2b09b2f..934bdf6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -200,7 +200,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::R2); // System-reserved register
Reserved.set(PPC::R13); // Small Data Area pointer register
}
-
+
// On PPC64, r13 is the thread pointer. Never allocate this register.
if (TM.isPPC64()) {
Reserved.set(PPC::R13);
@@ -262,7 +262,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
default:
return 0;
case PPC::G8RC_NOX0RegClassID:
- case PPC::GPRC_NOR0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
case PPC::G8RCRegClassID:
case PPC::GPRCRegClassID: {
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -311,7 +311,7 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
//===----------------------------------------------------------------------===//
/// lowerDynamicAlloc - Generate the code for allocating an object in the
-/// current frame. The sequence of code with be in the general form
+/// current frame. The sequence of code will be in the general form
///
/// addi R0, SP, \#frameSize ; get the address of the previous frame
/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
@@ -337,7 +337,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
// Get the total frame size.
unsigned FrameSize = MFI->getStackSize();
-
+
// Get stack alignments.
const PPCFrameLowering *TFI = getFrameLowering(MF);
unsigned TargetAlign = TFI->getStackAlignment();
@@ -347,14 +347,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
- // previous frame's address from 0(SP). Why not do an addis of the hi?
- // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
- // Constructing the constant and adding would take 3 instructions.
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
// Fortunately, a frame greater than 32K is rare.
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-
+
if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
@@ -425,11 +425,32 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
.addReg(PPC::R1)
.addImm(maxCallFrameSize);
}
-
+
// Discard the DYNALLOC instruction.
MBB.erase(II);
}
+void PPCRegisterInfo::lowerDynamicAreaOffset(
+ MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ DebugLoc dl = MI.getDebugLoc();
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+ .addImm(maxCallFrameSize);
+ MBB.erase(II);
+}
+
/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
/// reserving a whole register (R0), we scrounge for one here. This generates
/// code like this:
@@ -459,8 +480,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// We need to store the CR in the low 4-bits of the saved value. First, issue
// an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
if (SrcReg != PPC::CR0) {
@@ -549,8 +570,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
- .addReg(getCRFromCRBit(SrcReg));
-
+ .addReg(getCRFromCRBit(SrcReg));
+
// If the saved register wasn't CR0LT, shift the bits left so that the bit to
// store is the first one. Mask all but that bit.
unsigned Reg1 = Reg;
@@ -602,17 +623,19 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
unsigned ShiftBits = getEncodingValue(DestReg);
// rlwimi r11, r10, 32-ShiftBits, ..., ...
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
- .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
- .addImm(ShiftBits ? 32-ShiftBits : 0)
- .addImm(ShiftBits).addImm(ShiftBits);
-
+ .addReg(RegO, RegState::Kill)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32 - ShiftBits : 0)
+ .addImm(ShiftBits)
+ .addImm(ShiftBits);
+
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
getCRFromCRBit(DestReg))
- .addReg(RegO, RegState::Kill)
- // Make sure we have a use dependency all the way through this
- // sequence of instructions. We can't have the other bits in the CR
- // modified in between the mfocrf and the mtocrf.
- .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -634,11 +657,11 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
unsigned SrcReg = MI.getOperand(0).getReg();
BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
- .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-
- addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
- .addReg(Reg, RegState::Kill),
- FrameIndex);
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(
+ BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
+ FrameIndex);
// Discard the pseudo instruction.
MBB.erase(II);
@@ -671,9 +694,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-bool
-PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
- unsigned Reg, int &FrameIdx) const {
+bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
// ABI, return true to prevent allocating an additional frame slot.
@@ -752,7 +774,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FPSI = FI->getFramePointerSaveIndex();
// Get the instruction opcode.
unsigned OpC = MI.getOpcode();
-
+
+ if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) {
+ lowerDynamicAreaOffset(II);
+ return;
+ }
+
// Special case for dynamic alloca.
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
@@ -800,8 +827,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If we're not using a Frame Pointer that has been set to the value of the
// SP before having the stack size subtracted from it, then add the stack size
// to Offset to get the correct offset.
- // Naked functions have stack size 0, although getStackSize may not reflect that
- // because we didn't call all the pieces that compute it for naked functions.
+ // Naked functions have stack size 0, although getStackSize may not reflect
+ // that because we didn't call all the pieces that compute it for naked
+ // functions.
if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
if (!(hasBasePointer(MF) && FrameIndex < 0))
Offset += MFI->getStackSize();
@@ -840,7 +868,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
.addImm(Offset);
// Convert into indexed form of the instruction:
- //
+ //
// sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
// addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
unsigned OperandBase;
@@ -898,24 +926,6 @@ bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return needsStackRealignment(MF);
}
-bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
- return false;
-
- return true;
-}
-
-bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const PPCFrameLowering *TFI = getFrameLowering(MF);
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
-}
-
/// Returns true if the instruction's frame index
/// reference would be better served by a base register other than FP
/// or SP. Used by LocalStackFrameAllocation to determine which frame index
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index d304e1d..b15fde8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -54,13 +54,13 @@ inline static unsigned getCRFromCRBit(unsigned SrcReg) {
return Reg;
}
-
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCTargetMachine &TM;
+
public:
PPCRegisterInfo(const PPCTargetMachine &TM);
-
+
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
@@ -77,7 +77,7 @@ public:
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
@@ -101,6 +101,7 @@ public:
}
void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
void lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned FrameIndex) const;
void lowerCRRestore(MachineBasicBlock::iterator II,
@@ -115,9 +116,9 @@ public:
unsigned FrameIndex) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
+ int &FrameIdx) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
// Support for virtual base registers.
@@ -136,8 +137,6 @@ public:
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 58dacca..c0fcb6c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -62,6 +62,7 @@ void PPCSubtarget::initializeEnvironment() {
Has64BitSupport = false;
Use64BitRegs = false;
UseCRBits = false;
+ UseSoftFloat = false;
HasAltivec = false;
HasSPE = false;
HasQPX = false;
@@ -100,6 +101,8 @@ void PPCSubtarget::initializeEnvironment() {
HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
+ HasFusion = false;
+ HasFloat128 = false;
}
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -210,5 +213,33 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
+unsigned char PPCSubtarget::classifyGlobalReference(
+ const GlobalValue *GV) const {
+ // Note that currently we don't generate non-pic references.
+ // If a caller wants that, this will have to be updated.
+
+ // Large code model always uses the TOC even for local symbols.
+ if (TM.getCodeModel() == CodeModel::Large)
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+
+ unsigned char flags = PPCII::MO_PIC_FLAG;
+
+ // Only if the relocation mode is PIC do we have to worry about
+ // interposition. In all other cases we can use a slightly looser standard to
+ // decide how to access the symbol.
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // If it's local, or it's non-default, it can't be interposed.
+ if (!GV->hasLocalLinkage() &&
+ GV->hasDefaultVisibility()) {
+ flags |= PPCII::MO_NLP_FLAG;
+ }
+ return flags;
+ }
+
+ if (GV->isStrongDefinitionForLinker())
+ return flags;
+ return flags | PPCII::MO_NLP_FLAG;
+}
+
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 0616c1f..4f5c95c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -83,6 +83,7 @@ protected:
bool Has64BitSupport;
bool Use64BitRegs;
bool UseCRBits;
+ bool UseSoftFloat;
bool IsPPC64;
bool HasAltivec;
bool HasSPE;
@@ -119,6 +120,8 @@ protected:
bool HasPartwordAtomics;
bool HasDirectMove;
bool HasHTM;
+ bool HasFusion;
+ bool HasFloat128;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
/// alignment has not been changed, we need to keep the 16-byte alignment
@@ -188,6 +191,8 @@ public:
/// has64BitSupport - Return true if the selected CPU supports 64-bit
/// instructions, regardless of whether we are in 32-bit or 64-bit mode.
bool has64BitSupport() const { return Has64BitSupport; }
+ // useSoftFloat - Return true if soft-float option is turned on.
+ bool useSoftFloat() const { return UseSoftFloat; }
/// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
/// registers in 32-bit mode when possible. This can only true if
@@ -254,6 +259,8 @@ public:
return 16;
}
bool hasHTM() const { return HasHTM; }
+ bool hasFusion() const { return HasFusion; }
+ bool hasFloat128() const { return HasFloat128; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -285,6 +292,10 @@ public:
bool useAA() const override;
bool enableSubRegLiveness() const override;
+
+ /// classifyGlobalReference - Classify a global variable reference for the
+ /// current subtarget accourding to how we should reference it.
+ unsigned char classifyGlobalReference(const GlobalValue *GV) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1daf244..d24b590 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
cl::desc("Disable VSX Swap Removal for PPC"));
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+ cl::desc("Disable machine peepholes for PPC"));
+
static cl::opt<bool>
EnableGEPOpt("ppc-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
@@ -57,11 +61,19 @@ EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
cl::desc("Add extra TOC register dependencies"),
cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableMachineCombinerPass("ppc-machine-combiner",
+ cl::desc("Enable the machine combiner pass"),
+ cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializePPCBoolRetToIntPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -118,7 +130,7 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
}
if (OL != CodeGenOpt::None) {
- if (!FullFS.empty())
+ if (!FullFS.empty())
FullFS = "+invariant-function-descriptors," + FullFS;
else
FullFS = "+invariant-function-descriptors";
@@ -144,7 +156,7 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
return PPCTargetMachine::PPC_ABI_ELFv2;
assert(Options.MCOptions.getABIName().empty() &&
- "Unknown target-abi option!");
+ "Unknown target-abi option!");
if (!TT.isMacOSX()) {
switch (TT.getArch()) {
@@ -160,9 +172,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
-// The FeatureString here is a little subtle. We are modifying the feature string
-// with what are (currently) non-function specific overrides as it goes into the
-// LLVMTargetMachine constructor and then using the stored value in the
+// The FeatureString here is a little subtle. We are modifying the feature
+// string with what are (currently) non-function specific overrides as it goes
+// into the LLVMTargetMachine constructor and then using the stored value in the
// Subtarget constructor below it.
PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -227,6 +239,19 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
? FSAttr.getValueAsString().str()
: TargetFS;
+ // FIXME: This is related to the code below to reset the target options,
+ // we need to know whether or not the soft float flag is set on the
+ // function before we can generate a subtarget. We also need to use
+ // it as a key for the subtarget since that can be the only difference
+ // between two functions.
+ bool SoftFloat =
+ F.hasFnAttribute("use-soft-float") &&
+ F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+ // If the soft float attribute is set on the function turn on the soft float
+ // subtarget feature.
+ if (SoftFloat)
+ FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
auto &I = SubtargetMap[CPU + FS];
if (!I) {
// This needs to be done before we create a new subtarget since any
@@ -277,6 +302,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void PPCPassConfig::addIRPasses() {
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCBoolRetToIntPass());
addPass(createAtomicExpandPass(&getPPCTargetMachine()));
// For the BG/Q (or if explicitly requested), add explicit data prefetch
@@ -316,6 +343,10 @@ bool PPCPassConfig::addPreISel() {
bool PPCPassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
+
+ if (EnableMachineCombinerPass)
+ addPass(&MachineCombinerID);
+
return true;
}
@@ -339,6 +370,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
!DisableVSXSwapRemoval)
addPass(createPPCVSXSwapRemovalPass());
+ // Target-specific peephole cleanups performed after instruction
+ // selection.
+ if (!DisableMIPeephole) {
+ addPass(createPPCMIPeepholePass());
+ addPass(&DeadMachineInstructionElimID);
+ }
}
void PPCPassConfig::addPreRegAlloc() {
@@ -364,6 +401,7 @@ void PPCPassConfig::addPreEmitPass() {
}
TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(PPCTTIImpl(this, F));
+ });
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 9ee5db9..798bb9d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -42,9 +42,7 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
if (Kind.isReadOnly()) {
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (GVar && GVar->isConstant() &&
- (GVar->getInitializer()->getRelocationInfo() ==
- Constant::GlobalRelocations))
+ if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation())
Kind = SectionKind::getReadOnlyWithRel();
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e21c2b7..cd86dab 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -35,7 +35,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Imm, Ty);
@@ -64,8 +64,8 @@ unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
@@ -98,8 +98,8 @@ unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return PPCTTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
if (DisablePPCConstHoist)
return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
@@ -197,9 +197,20 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
}
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+ // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+ // on combining the loads generated for consecutive accesses, and failure to
+ // do so is particularly expensive. This makes it much more likely (compared
+ // to only using concatenation unrolling).
+ if (ST->getDarwinDirective() == PPC::DIR_A2)
+ return true;
+
return LoopHasReductions;
}
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+ return true;
+}
+
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
return 0;
@@ -246,7 +257,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-unsigned PPCTTIImpl::getArithmeticInstrCost(
+int PPCTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
@@ -257,24 +268,30 @@ unsigned PPCTTIImpl::getArithmeticInstrCost(
Opd1PropInfo, Opd2PropInfo);
}
-unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
- return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). We need one such shuffle instruction for each actual
+ // register (this is not true for arbitrary shuffles, but is true for the
+ // structured types of shuffles covered by TTI::ShuffleKind).
+ return LT.first;
}
-unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -313,41 +330,83 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
-unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
- unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
-
- // VSX loads/stores support unaligned access.
- if (ST->hasVSX()) {
- if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
- return Cost;
- }
+ int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- bool UnalignedAltivec =
- Src->isVectorTy() &&
- Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
- LT.second.getSizeInBits() == 128 &&
- Opcode == Instruction::Load;
+ // Aligned loads and stores are easy.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+ return Cost;
+
+ bool IsAltivecType = ST->hasAltivec() &&
+ (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
+ LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
+ bool IsVSXType = ST->hasVSX() &&
+ (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
+ bool IsQPXType = ST->hasQPX() &&
+ (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
+
+ // If we can use the permutation-based load sequence, then this is also
+ // relatively cheap (not counting loop-invariant instructions): one load plus
+ // one permute (the last load in a series has extra cost, but we're
+ // neglecting that here). Note that on the P7, we should do unaligned loads
+ // for Altivec types using the VSX instructions, but that's more expensive
+ // than using the permutation-based load sequence. On the P8, that's no
+ // longer true.
+ if (Opcode == Instruction::Load &&
+ ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+ Alignment >= LT.second.getScalarType().getStoreSize())
+ return Cost + LT.first; // Add the cost of the permutations.
+
+ // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
+ // P7, unaligned vector loads are more expensive than the permutation-based
+ // load sequence, so that might be used instead, but regardless, the net cost
+ // is about the same (not counting loop-invariant instructions).
+ if (IsVSXType || (ST->hasVSX() && IsAltivecType))
+ return Cost;
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
- unsigned SrcBytes = LT.second.getStoreSize();
- if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
- Cost += LT.first*(SrcBytes/Alignment-1);
-
- // For a vector type, there is also scalarization overhead (only for
- // stores, loads are expanded using the vector-load + permutation sequence,
- // which is much less expensive).
- if (Src->isVectorTy() && Opcode == Instruction::Store)
- for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
- }
+
+ // Add the cost of each scalar load or store.
+ Cost += LT.first*(SrcBytes/Alignment-1);
+
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+
+ return Cost;
+}
+
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ // Legalize the type.
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+ // Firstly, the cost of load/store operation.
+ int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+ // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+ // (at least in the sense that there need only be one non-loop-invariant
+ // instruction). For each result vector, we need one shuffle per incoming
+ // vector (except that the first shuffle can take two incoming vectors
+ // because it does not need to take itself).
+ Cost += Factor*(LT.first-1);
return Cost;
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 368bef9..04c1b02 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -37,7 +37,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCTargetLowering *getTLI() const { return TLI; }
public:
- explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
+ explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -52,12 +52,11 @@ public:
/// @{
using BaseT::getIntImmCost;
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -68,22 +67,27 @@ public:
/// @{
bool enableAggressiveInterleaving(bool LoopHasReductions);
+ bool enableInterleavedAccessVectorization();
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 5e3ae2a..782583c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -77,6 +77,14 @@ namespace {
return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
}
+ bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+ }
+
+ bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+ }
+
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -100,7 +108,9 @@ protected:
IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(SrcMO.getReg(), MRI) ||
- IsVRReg(SrcMO.getReg(), MRI)) &&
+ IsVRReg(SrcMO.getReg(), MRI) ||
+ IsVSSReg(SrcMO.getReg(), MRI) ||
+ IsVSFReg(SrcMO.getReg(), MRI)) &&
"Unknown source for a VSX copy");
unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
@@ -123,6 +133,8 @@ protected:
IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
&PPC::VSLRCRegClass;
assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVSFReg(DstMO.getReg(), MRI) ||
+ IsVSSReg(DstMO.getReg(), MRI) ||
IsVRReg(DstMO.getReg(), MRI)) &&
"Unknown destination for a VSX copy");
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 46b8d13..6b19a2f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -103,10 +103,10 @@ protected:
VNInfo *AddendValNo =
LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
- if (!AddendValNo) {
- // This can be null if the register is undef.
+
+ // This can be null if the register is undef.
+ if (!AddendValNo)
continue;
- }
MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
@@ -186,18 +186,17 @@ protected:
if (!KilledProdOp)
continue;
- // If the addend copy is used only by this MI, then the addend source
- // register is likely not live here. This could be fixed (based on the
- // legality checks above, the live range for the addend source register
- // could be extended), but it seems likely that such a trivial copy can
- // be coalesced away later, and thus is not worth the effort.
- if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+ // If the addend copy is used only by this MI, then the addend source
+ // register is likely not live here. This could be fixed (based on the
+ // legality checks above, the live range for the addend source register
+ // could be extended), but it seems likely that such a trivial copy can
+ // be coalesced away later, and thus is not worth the effort.
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
!LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
continue;
// Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
- unsigned AddReg = AddendMI->getOperand(1).getReg();
unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
@@ -221,6 +220,14 @@ protected:
if (OldFMAReg == KilledProdReg)
continue;
+ // If there isn't a class that fits, we can't perform the transform.
+ // This is needed for correctness with a mixture of VSX and Altivec
+ // instructions to make sure that a low VSX register is not assigned to
+ // the Altivec instruction.
+ if (!MRI.constrainRegClass(KilledProdReg,
+ MRI.getRegClass(OldFMAReg)))
+ continue;
+
assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
"Addend copy not tied to old FMA output!");
@@ -228,7 +235,7 @@ protected:
MI->getOperand(0).setReg(KilledProdReg);
MI->getOperand(1).setReg(KilledProdReg);
- MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(3).setReg(AddendSrcReg);
MI->getOperand(2).setReg(OtherProdReg);
MI->getOperand(0).setSubReg(KilledProdSubReg);
@@ -263,8 +270,7 @@ protected:
if (UseMI == AddendMI)
continue;
- UseMO.setReg(KilledProdReg);
- UseMO.setSubReg(KilledProdSubReg);
+ UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
}
// Extend the live intervals of the killed product operand to hold the
@@ -286,6 +292,20 @@ protected:
}
DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+ // Extend the live interval of the addend source (it might end at the
+ // copy to be removed, or somewhere in between there and here). This
+ // is necessary only if it is a physical register.
+ if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+ for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
+ ++Units) {
+ unsigned Unit = *Units;
+
+ LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
+ AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
+ FMAIdx.getRegSlot());
+ DEBUG(dbgs() << " extended: " << AddendSrcRange << '\n');
+ }
+
FMAInt.removeValNo(FMAValNo);
DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
@@ -347,7 +367,6 @@ INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
char PPCVSXFMAMutate::ID = 0;
-FunctionPass*
-llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
-
-
+FunctionPass *llvm::createPPCVSXFMAMutatePass() {
+ return new PPCVSXFMAMutate();
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d7132d5..27c540f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -94,7 +94,7 @@ enum SHValues {
SH_NOSWAP_ST,
SH_SPLAT,
SH_XXPERMDI,
- SH_COPYSCALAR
+ SH_COPYWIDEN
};
struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -149,6 +149,11 @@ private:
// handling. Return true iff any changes are made.
bool removeSwaps();
+ // Insert a swap instruction from SrcReg to DstReg at the given
+ // InsertPoint.
+ void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg);
+
// Update instructions requiring special handling.
void handleSpecialSwappables(int EntryIdx);
@@ -159,9 +164,7 @@ private:
bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
return RC->hasSubClassEq(MRI->getRegClass(Reg));
- if (RC->contains(Reg))
- return true;
- return false;
+ return RC->contains(Reg);
}
// Return true iff the given register is a full vector register.
@@ -215,7 +218,7 @@ public:
void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
MF = &MFParm;
MRI = &MF->getRegInfo();
- TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+ TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
// An initial vector size of 256 appears to work well in practice.
// Small/medium functions with vector content tend not to incur a
@@ -343,6 +346,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
SwapVector[VecIdx].IsLoad = 1;
SwapVector[VecIdx].IsSwap = 1;
break;
+ case PPC::LXSDX:
+ case PPC::LXSSPX:
+ // A load of a floating-point value into the high-order half of
+ // a vector register is safe, provided that we introduce a swap
+ // following the load, which will be done by the SUBREG_TO_REG
+ // support. So just mark these as safe.
+ SwapVector[VecIdx].IsLoad = 1;
+ SwapVector[VecIdx].IsSwappable = 1;
+ break;
case PPC::STVX:
// Non-permuting stores are currently unsafe. We can use special
// handling for this in the future. By not marking these as
@@ -385,7 +397,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
else if (isVecReg(MI.getOperand(0).getReg()) &&
isScalarVecReg(MI.getOperand(2).getReg())) {
SwapVector[VecIdx].IsSwappable = 1;
- SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+ SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
}
break;
}
@@ -420,7 +432,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
case PPC::STVEHX:
case PPC::STVEWX:
case PPC::STVXL:
+ // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+ // by adding special handling for narrowing copies as well as
+ // widening ones. However, I've experimented with this, and in
+ // practice we currently do not appear to use STXSDX fed by
+ // a narrowing copy from a full vector register. Since I can't
+ // generate any useful test cases, I've left this alone for now.
case PPC::STXSDX:
+ case PPC::STXSSPX:
case PPC::VCIPHER:
case PPC::VCIPHERLAST:
case PPC::VMRGHB:
@@ -543,7 +562,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
}
if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
- SwapVector[VecIdx].MentionsPhysVR = 1;
+ if (!isScalarVecReg(CopySrcReg))
+ SwapVector[VecIdx].MentionsPhysVR = 1;
return CopySrcReg;
}
@@ -629,8 +649,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
SwapVector[Repr].WebRejected = 1;
DEBUG(dbgs() <<
- format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
- Repr));
+ format("Web %d rejected for physreg, partial reg, or not "
+ "swap[pable]\n", Repr));
DEBUG(dbgs() << " in " << EntryIdx << ": ");
DEBUG(SwapVector[EntryIdx].VSEMI->dump());
DEBUG(dbgs() << "\n");
@@ -743,6 +763,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
}
}
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap: SrcReg = XXPERMDI Reg, Reg, 2; If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+ MachineBasicBlock::iterator InsertPoint,
+ unsigned DstReg, unsigned SrcReg) {
+ BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+ TII->get(PPC::XXPERMDI), DstReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .addImm(2);
+}
+
// The identified swap entry requires special handling to allow its
// containing computation to be optimized. Perform that handling
// here.
@@ -752,8 +787,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (SwapVector[EntryIdx].SpecialHandling) {
default:
- assert(false && "Unexpected special handling type");
- break;
+ llvm_unreachable("Unexpected special handling type");
// For splats based on an index into a vector, add N/2 modulo N
// to the index, where N is the number of vector elements.
@@ -766,7 +800,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
switch (MI->getOpcode()) {
default:
- assert(false && "Unexpected splat opcode");
+ llvm_unreachable("Unexpected splat opcode");
case PPC::VSPLTB: NElts = 16; break;
case PPC::VSPLTH: NElts = 8; break;
case PPC::VSPLTW: NElts = 4; break;
@@ -811,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
// For a copy from a scalar floating-point register to a vector
// register, removing swaps will leave the copied value in the
// wrong lane. Insert a swap following the copy to fix this.
- case SHValues::SH_COPYSCALAR: {
+ case SHValues::SH_COPYWIDEN: {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
@@ -825,14 +859,13 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
DEBUG(dbgs() << " Into: ");
DEBUG(MI->dump());
- MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+ auto InsertPoint = ++MachineBasicBlock::iterator(MI);
// Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
// is copying to a VRRC, we need to be careful to avoid a register
// assignment problem. In this case we must copy from VRRC to VSRC
// prior to the swap, and from VSRC to VRRC following the swap.
// Coalescing will usually remove all this mess.
-
if (DstRC == &PPC::VRRCRegClass) {
unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
@@ -840,29 +873,19 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), VSRCTmp1)
.addReg(NewVReg);
- DEBUG(MI->getNextNode()->dump());
+ DEBUG(std::prev(InsertPoint)->dump());
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), VSRCTmp2)
- .addReg(VSRCTmp1)
- .addReg(VSRCTmp1)
- .addImm(2);
- DEBUG(MI->getNextNode()->getNextNode()->dump());
+ insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
+ DEBUG(std::prev(InsertPoint)->dump());
BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
TII->get(PPC::COPY), DstReg)
.addReg(VSRCTmp2);
- DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+ DEBUG(std::prev(InsertPoint)->dump());
} else {
-
- BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
- TII->get(PPC::XXPERMDI), DstReg)
- .addReg(NewVReg)
- .addReg(NewVReg)
- .addImm(2);
-
- DEBUG(MI->getNextNode()->dump());
+ insertSwap(MI, InsertPoint, DstReg, NewVReg);
+ DEBUG(std::prev(InsertPoint)->dump());
}
break;
}
@@ -947,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
case SH_XXPERMDI:
DEBUG(dbgs() << "special:xxpermdi ");
break;
- case SH_COPYSCALAR:
- DEBUG(dbgs() << "special:copyscalar ");
+ case SH_COPYWIDEN:
+ DEBUG(dbgs() << "special:copywiden ");
break;
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 1c4e486..a552747 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -34,7 +35,6 @@ namespace {
class SparcOperand;
class SparcAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
MCAsmParser &Parser;
/// @name Auto-generated Match Functions
@@ -69,6 +69,10 @@ class SparcAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
+ // Helper function for dealing with %lo / %hi in PIC mode.
+ const SparcMCExpr *adjustPICRelocation(SparcMCExpr::VariantKind VK,
+ const MCExpr *subExpr);
+
// returns true if Tok is matched to a register and returns register in RegNo.
bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
unsigned &RegKind);
@@ -77,24 +81,24 @@ class SparcAsmParser : public MCTargetAsmParser {
bool parseDirectiveWord(unsigned Size, SMLoc L);
bool is64Bit() const {
- return STI.getTargetTriple().getArch() == Triple::sparcv9;
+ return getSTI().getTargetTriple().getArch() == Triple::sparcv9;
}
void expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
public:
- SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(Options, sti), Parser(parser) {
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
};
- static unsigned IntRegs[32] = {
+ static const MCPhysReg IntRegs[32] = {
Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3,
Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7,
Sparc::O0, Sparc::O1, Sparc::O2, Sparc::O3,
@@ -104,7 +108,7 @@ public:
Sparc::I0, Sparc::I1, Sparc::I2, Sparc::I3,
Sparc::I4, Sparc::I5, Sparc::I6, Sparc::I7 };
- static unsigned FloatRegs[32] = {
+ static const MCPhysReg FloatRegs[32] = {
Sparc::F0, Sparc::F1, Sparc::F2, Sparc::F3,
Sparc::F4, Sparc::F5, Sparc::F6, Sparc::F7,
Sparc::F8, Sparc::F9, Sparc::F10, Sparc::F11,
@@ -114,7 +118,7 @@ public:
Sparc::F24, Sparc::F25, Sparc::F26, Sparc::F27,
Sparc::F28, Sparc::F29, Sparc::F30, Sparc::F31 };
- static unsigned DoubleRegs[32] = {
+ static const MCPhysReg DoubleRegs[32] = {
Sparc::D0, Sparc::D1, Sparc::D2, Sparc::D3,
Sparc::D4, Sparc::D5, Sparc::D6, Sparc::D7,
Sparc::D8, Sparc::D7, Sparc::D8, Sparc::D9,
@@ -124,13 +128,13 @@ public:
Sparc::D24, Sparc::D25, Sparc::D26, Sparc::D27,
Sparc::D28, Sparc::D29, Sparc::D30, Sparc::D31 };
- static unsigned QuadFPRegs[32] = {
+ static const MCPhysReg QuadFPRegs[32] = {
Sparc::Q0, Sparc::Q1, Sparc::Q2, Sparc::Q3,
Sparc::Q4, Sparc::Q5, Sparc::Q6, Sparc::Q7,
Sparc::Q8, Sparc::Q9, Sparc::Q10, Sparc::Q11,
Sparc::Q12, Sparc::Q13, Sparc::Q14, Sparc::Q15 };
- static unsigned ASRRegs[32] = {
+ static const MCPhysReg ASRRegs[32] = {
SP::Y, SP::ASR1, SP::ASR2, SP::ASR3,
SP::ASR4, SP::ASR5, SP::ASR6, SP::ASR7,
SP::ASR8, SP::ASR9, SP::ASR10, SP::ASR11,
@@ -140,6 +144,12 @@ public:
SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+ static const MCPhysReg IntPairRegs[] = {
+ Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7,
+ Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7,
+ Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7,
+ Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7};
+
/// SparcOperand - Instances of this class represent a parsed Sparc machine
/// instruction.
class SparcOperand : public MCParsedAsmOperand {
@@ -147,6 +157,7 @@ public:
enum RegisterKind {
rk_None,
rk_IntReg,
+ rk_IntPairReg,
rk_FloatReg,
rk_DoubleReg,
rk_QuadReg,
@@ -200,6 +211,10 @@ public:
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
+ bool isIntReg() const {
+ return (Kind == k_Register && Reg.Kind == rk_IntReg);
+ }
+
bool isFloatReg() const {
return (Kind == k_Register && Reg.Kind == rk_FloatReg);
}
@@ -330,6 +345,25 @@ public:
return Op;
}
+ static bool MorphToIntPairReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
+ assert(Op.Reg.Kind == rk_IntReg);
+ unsigned regIdx = 32;
+ if (Reg >= Sparc::G0 && Reg <= Sparc::G7)
+ regIdx = Reg - Sparc::G0;
+ else if (Reg >= Sparc::O0 && Reg <= Sparc::O7)
+ regIdx = Reg - Sparc::O0 + 8;
+ else if (Reg >= Sparc::L0 && Reg <= Sparc::L7)
+ regIdx = Reg - Sparc::L0 + 16;
+ else if (Reg >= Sparc::I0 && Reg <= Sparc::I7)
+ regIdx = Reg - Sparc::I0 + 24;
+ if (regIdx % 2 || regIdx > 31)
+ return false;
+ Op.Reg.RegNum = IntPairRegs[regIdx / 2];
+ Op.Reg.Kind = rk_IntPairReg;
+ return true;
+ }
+
static bool MorphToDoubleReg(SparcOperand &Op) {
unsigned Reg = Op.getReg();
assert(Op.Reg.Kind == rk_FloatReg);
@@ -407,7 +441,22 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
// the imm operand can be either an expression or an immediate.
bool IsImm = Inst.getOperand(1).isImm();
- uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+ int64_t RawImmValue = IsImm ? MCValOp.getImm() : 0;
+
+ // Allow either a signed or unsigned 32-bit immediate.
+ if (RawImmValue < -2147483648LL || RawImmValue > 4294967295LL) {
+ Error(IDLoc, "set: argument must be between -2147483648 and 4294967295");
+ return;
+ }
+
+ // If the value was expressed as a large unsigned number, that's ok.
+ // We want to see if it "looks like" a small signed number.
+ int32_t ImmValue = RawImmValue;
+ // For 'set' you can't use 'or' with a negative operand on V9 because
+ // that would splat the sign bit across the upper half of the destination
+ // register, whereas 'set' is defined to zero the high 32 bits.
+ bool IsEffectivelyImm13 =
+ IsImm && ((is64Bit() ? 0 : -4096) <= ImmValue && ImmValue < 4096);
const MCExpr *ValExpr;
if (IsImm)
ValExpr = MCConstantExpr::create(ImmValue, getContext());
@@ -416,10 +465,12 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
MCOperand PrevReg = MCOperand::createReg(Sparc::G0);
- if (!IsImm || (ImmValue & ~0x1fff)) {
+ // If not just a signed imm13 value, then either we use a 'sethi' with a
+ // following 'or', or a 'sethi' by itself if there are no more 1 bits.
+ // In either case, start with the 'sethi'.
+ if (!IsEffectivelyImm13) {
MCInst TmpInst;
- const MCExpr *Expr =
- SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
+ const MCExpr *Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr);
TmpInst.setLoc(IDLoc);
TmpInst.setOpcode(SP::SETHIi);
TmpInst.addOperand(MCRegOp);
@@ -428,10 +479,23 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
PrevReg = MCRegOp;
}
- if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) {
+ // The low bits require touching in 3 cases:
+ // * A non-immediate value will always require both instructions.
+ // * An effectively imm13 value needs only an 'or' instruction.
+ // * Otherwise, an immediate that is not effectively imm13 requires the
+ // 'or' only if bits remain after clearing the 22 bits that 'sethi' set.
+ // If the low bits are known zeros, there's nothing to do.
+ // In the second case, and only in that case, must we NOT clear
+ // bits of the immediate value via the %lo() assembler function.
+ // Note also, the 'or' instruction doesn't mind a large value in the case
+ // where the operand to 'set' was 0xFFFFFzzz - it does exactly what you mean.
+ if (!IsImm || IsEffectivelyImm13 || (ImmValue & 0x3ff)) {
MCInst TmpInst;
- const MCExpr *Expr =
- SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
+ const MCExpr *Expr;
+ if (IsEffectivelyImm13)
+ Expr = ValExpr;
+ else
+ Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr);
TmpInst.setLoc(IDLoc);
TmpInst.setOpcode(SP::ORri);
TmpInst.addOperand(MCRegOp);
@@ -463,7 +527,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
for (const MCInst &I : Instructions) {
- Out.EmitInstruction(I, STI);
+ Out.EmitInstruction(I, getSTI());
}
return false;
}
@@ -742,6 +806,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
case Sparc::PSR:
Op = SparcOperand::CreateToken("%psr", S);
break;
+ case Sparc::FSR:
+ Op = SparcOperand::CreateToken("%fsr", S);
+ break;
case Sparc::WIM:
Op = SparcOperand::CreateToken("%wim", S);
break;
@@ -766,6 +833,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
case AsmToken::Minus:
case AsmToken::Integer:
case AsmToken::LParen:
+ case AsmToken::Dot:
if (!getParser().parseExpression(EVal, E))
Op = SparcOperand::CreateImm(EVal, S, E);
break;
@@ -848,6 +916,13 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
return true;
}
+ // %fprs is an alias of %asr6.
+ if (name.equals("fprs")) {
+ RegNo = ASRRegs[6];
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+
if (name.equals("icc")) {
RegNo = Sparc::ICC;
RegKind = SparcOperand::rk_Special;
@@ -860,6 +935,12 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
return true;
}
+ if (name.equals("fsr")) {
+ RegNo = Sparc::FSR;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+
if (name.equals("wim")) {
RegNo = Sparc::WIM;
RegKind = SparcOperand::rk_Special;
@@ -943,6 +1024,82 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
RegKind = SparcOperand::rk_IntReg;
return true;
}
+
+ if (name.equals("tpc")) {
+ RegNo = Sparc::TPC;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tnpc")) {
+ RegNo = Sparc::TNPC;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tstate")) {
+ RegNo = Sparc::TSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tt")) {
+ RegNo = Sparc::TT;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tick")) {
+ RegNo = Sparc::TICK;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tba")) {
+ RegNo = Sparc::TBA;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("pstate")) {
+ RegNo = Sparc::PSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("tl")) {
+ RegNo = Sparc::TL;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("pil")) {
+ RegNo = Sparc::PIL;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cwp")) {
+ RegNo = Sparc::CWP;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cansave")) {
+ RegNo = Sparc::CANSAVE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("canrestore")) {
+ RegNo = Sparc::CANRESTORE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("cleanwin")) {
+ RegNo = Sparc::CLEANWIN;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("otherwin")) {
+ RegNo = Sparc::OTHERWIN;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
+ if (name.equals("wstate")) {
+ RegNo = Sparc::WSTATE;
+ RegKind = SparcOperand::rk_Special;
+ return true;
+ }
}
return false;
}
@@ -975,6 +1132,32 @@ static bool hasGOTReference(const MCExpr *Expr) {
return false;
}
+const SparcMCExpr *
+SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
+ const MCExpr *subExpr)
+{
+ // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
+ // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
+ // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
+ // as %got10 or %got22 relocation.
+
+ if (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) {
+ switch(VK) {
+ default: break;
+ case SparcMCExpr::VK_Sparc_LO:
+ VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC10
+ : SparcMCExpr::VK_Sparc_GOT10);
+ break;
+ case SparcMCExpr::VK_Sparc_HI:
+ VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC22
+ : SparcMCExpr::VK_Sparc_GOT22);
+ break;
+ }
+ }
+
+ return SparcMCExpr::create(VK, subExpr, getContext());
+}
+
bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
SMLoc &EndLoc)
{
@@ -998,30 +1181,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
if (Parser.parseParenExpression(subExpr, EndLoc))
return false;
- bool isPIC = getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
-
- // Ugly: if a sparc assembly expression says "%hi(...)" but the
- // expression within contains _GLOBAL_OFFSET_TABLE_, it REALLY means
- // %pc22. Same with %lo -> %pc10. Worse, if it doesn't contain that,
- // the meaning depends on whether the assembler was invoked with
- // -KPIC or not: if so, it really means %got22/%got10; if not, it
- // actually means what it said! Sigh, historical mistakes...
-
- switch(VK) {
- default: break;
- case SparcMCExpr::VK_Sparc_LO:
- VK = (hasGOTReference(subExpr)
- ? SparcMCExpr::VK_Sparc_PC10
- : (isPIC ? SparcMCExpr::VK_Sparc_GOT10 : VK));
- break;
- case SparcMCExpr::VK_Sparc_HI:
- VK = (hasGOTReference(subExpr)
- ? SparcMCExpr::VK_Sparc_PC22
- : (isPIC ? SparcMCExpr::VK_Sparc_GOT22 : VK));
- break;
- }
-
- EVal = SparcMCExpr::create(VK, subExpr, getContext());
+ EVal = adjustPICRelocation(VK, subExpr);
return true;
}
@@ -1051,5 +1211,9 @@ unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
break;
}
}
+ if (Op.isIntReg() && Kind == MCK_IntPair) {
+ if (SparcOperand::MorphToIntPairReg(Op))
+ return MCTargetAsmParser::Match_Success;
+ }
return Match_InvalidOperand;
}
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 38bff44..c689b7f 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -122,6 +122,8 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
}
+ // TODO: If we ever want to support v7, this needs to be extended
+ // to cover all floating point operations.
if (!Subtarget->isV9() &&
(MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD
|| MI->getOpcode() == SP::FCMPQ)) {
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e56b9e..51751ec 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -117,6 +117,19 @@ static const unsigned ASRRegDecoderTable[] = {
SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
+static const unsigned PRRegDecoderTable[] = {
+ SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK, SP::TBA, SP::PSTATE,
+ SP::TL, SP::PIL, SP::CWP, SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN,
+ SP::OTHERWIN, SP::WSTATE
+};
+
+static const uint16_t IntPairDecoderTable[] = {
+ SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
+ SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7,
+ SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7,
+ SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7,
+};
+
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -196,9 +209,34 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= array_lengthof(PRRegDecoderTable))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ if ((RegNo & 1))
+ S = MCDisassembler::SoftFail;
+
+ unsigned RegisterPair = IntPairDecoderTable[RegNo/2];
+ Inst.addOperand(MCOperand::createReg(RegisterPair));
+ return S;
+}
static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
@@ -207,6 +245,8 @@ static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
@@ -326,6 +366,12 @@ static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
DecodeIntRegsRegisterClass);
}
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+ const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, true,
+ DecodeIntPairRegisterClass);
+}
+
static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, true,
@@ -350,6 +396,12 @@ static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
DecodeIntRegsRegisterClass);
}
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+ uint64_t Address, const void *Decoder) {
+ return DecodeMem(Inst, insn, Address, Decoder, false,
+ DecodeIntPairRegisterClass);
+}
+
static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
const void *Decoder) {
return DecodeMem(Inst, insn, Address, Decoder, false,
diff --git a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 0b01b88..6f06d1d 100644
--- a/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/contrib/llvm/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class SparcInstPrinter : public MCInstPrinter {
public:
SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 12386f1..ad44122 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -21,6 +21,7 @@ class Triple;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
void anchor() override;
+
public:
explicit SparcELFMCAsmInfo(const Triple &TheTriple);
const MCExpr*
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index d08ad86..13f0819 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -90,8 +90,8 @@ public:
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
- MCSection *findAssociatedSection() const override {
- return getSubExpr()->findAssociatedSection();
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
}
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index c5f046b..e3b0f52 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -267,11 +267,11 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo());
return;
}
- MachineBasicBlock::const_instr_iterator I = MI;
+ MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
do {
MCInst TmpInst;
- LowerSparcMachineInstrToMCInst(I, TmpInst, *this);
+ LowerSparcMachineInstrToMCInst(&*I, TmpInst, *this);
EmitToStreamer(*OutStreamer, TmpInst);
} while ((++I != E) && I->isInsideBundle()); // Delay slot check.
}
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand (opNum);
SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
@@ -373,7 +373,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
O << MO.getSymbolName();
break;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
<< MO.getIndex();
break;
default:
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index dfaaabf..0aa29d1 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -21,7 +21,11 @@ def CC_Sparc32 : CallingConv<[
// i32 f32 arguments get passed in integer registers if there is space.
CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
// f64 arguments are split and passed through registers or through stack.
- CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+ CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
+ // As are v2i32 arguments (this would be the default behavior for
+ // v2i32 if it wasn't allocated to the IntPair register-class)
+ CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
+
// Alternatively, they are assigned to the stack in 4-byte aligned units.
CCAssignToStack<4, 4>
@@ -30,7 +34,8 @@ def CC_Sparc32 : CallingConv<[
def RetCC_Sparc32 : CallingConv<[
CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+ CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
+ CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index c0279da..39b5e80 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -44,7 +44,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
unsigned ADDrr,
unsigned ADDri) const {
- DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc dl;
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -90,8 +90,23 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
MachineFrameInfo *MFI = MF.getFrameInfo();
const SparcInstrInfo &TII =
*static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SparcRegisterInfo &RegInfo =
+ *static_cast<const SparcRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
MachineBasicBlock::iterator MBBI = MBB.begin();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
+ bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
+ // FIXME: unfortunately, returning false from canRealignStack
+ // actually just causes needsStackRealignment to return false,
+ // rather than reporting an error, as would be sensible. This is
+ // poor, but fixing that bogosity is going to be a large project.
+ // For now, just see if it's lied, and report an error here.
+ if (!NeedsStackRealignment && MFI->getMaxAlignment() > getStackAlignment())
+ report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
+ "stack re-alignment, but LLVM couldn't handle it "
+ "(probably because it has a dynamic alloca).");
// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int) MFI->getStackSize();
@@ -104,12 +119,43 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
SAVEri = SP::ADDri;
SAVErr = SP::ADDrr;
}
- NumBytes = -MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
- emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
+
+ // The SPARC ABI is a bit odd in that it requires a reserved 92-byte
+ // (128 in v9) area in the user's stack, starting at %sp. Thus, the
+ // first part of the stack that can actually be used is located at
+ // %sp + 92.
+ //
+ // We therefore need to add that offset to the total stack size
+ // after all the stack objects are placed by
+ // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack needs to be
+ // aligned *after* the extra size is added, we need to disable
+ // calculateFrameObjectOffsets's built-in stack alignment, by having
+ // targetHandlesStackFrameRounding return true.
+
+
+ // Add the extra call frame stack size, if needed. (This is the same
+ // code as in PrologEpilogInserter, but also gets disabled by
+ // targetHandlesStackFrameRounding)
+ if (MFI->adjustsStack() && hasReservedCallFrame(MF))
+ NumBytes += MFI->getMaxCallFrameSize();
+
+ // Adds the SPARC subtarget-specific spill area to the stack
+ // size. Also ensures target-required alignment.
+ NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
+
+ // Finally, ensure that the size is sufficiently aligned for the
+ // data on the stack.
+ if (MFI->getMaxAlignment() > 0) {
+ NumBytes = RoundUpToAlignment(NumBytes, MFI->getMaxAlignment());
+ }
+
+ // Update stack size with corrected value.
+ MFI->setStackSize(NumBytes);
+
+ emitSPAdjustment(MF, MBB, MBBI, -NumBytes, SAVErr, SAVEri);
MachineModuleInfo &MMI = MF.getMMI();
- const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- unsigned regFP = MRI->getDwarfRegNum(SP::I6, true);
+ unsigned regFP = RegInfo.getDwarfRegNum(SP::I6, true);
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
@@ -122,13 +168,19 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
- unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
+ unsigned regInRA = RegInfo.getDwarfRegNum(SP::I7, true);
+ unsigned regOutRA = RegInfo.getDwarfRegNum(SP::O7, true);
// Emit ".cfi_register 15, 31".
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+
+ if (NeedsStackRealignment) {
+ // andn %o6, MaxAlign-1, %o6
+ int MaxAlign = MFI->getMaxAlignment();
+ BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1);
+ }
}
void SparcFrameLowering::
@@ -167,7 +219,6 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes == 0)
return;
- NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
@@ -180,21 +231,69 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
- MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+ RegInfo->needsStackRealignment(MF) ||
+ MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken();
}
+int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ // Addressable stack objects are accessed using neg. offsets from
+ // %fp, or positive offsets from %sp.
+ bool UseFP;
+
+ // Sparc uses FP-based references in general, even when "hasFP" is
+ // false. That function is rather a misnomer, because %fp is
+ // actually always available, unless isLeafProc.
+ if (FuncInfo->isLeafProc()) {
+ // If there's a leaf proc, all offsets need to be %sp-based,
+ // because we haven't caused %fp to actually point to our frame.
+ UseFP = false;
+ } else if (isFixed) {
+ // Otherwise, argument access should always use %fp.
+ UseFP = true;
+ } else if (RegInfo->needsStackRealignment(MF)) {
+ // If there is dynamic stack realignment, all local object
+ // references need to be via %sp, to take account of the
+ // re-alignment.
+ UseFP = false;
+ } else {
+ // Finally, default to using %fp.
+ UseFP = true;
+ }
+
+ int64_t FrameOffset = MF.getFrameInfo()->getObjectOffset(FI) +
+ Subtarget.getStackPointerBias();
+
+ if (UseFP) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FrameOffset;
+ } else {
+ FrameReg = SP::O6; // %sp
+ return FrameOffset + MF.getFrameInfo()->getStackSize();
+ }
+}
+
static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
- if (MRI->isPhysRegUsed(reg))
+ if (!MRI->reg_nodbg_empty(reg))
return false;
return true;
@@ -206,33 +305,42 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- return !(MFI->hasCalls() // has calls
- || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
- || MRI.isPhysRegUsed(SP::O6) // %SP is used
- || hasFP(MF)); // need %FP
+ return !(MFI->hasCalls() // has calls
+ || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
+ || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+ || hasFP(MF)); // need %FP
}
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
-
MachineRegisterInfo &MRI = MF.getRegInfo();
-
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (!MRI.isPhysRegUsed(reg))
+ if (MRI.reg_nodbg_empty(reg))
continue;
- unsigned mapped_reg = (reg - SP::I0 + SP::O0);
- assert(!MRI.isPhysRegUsed(mapped_reg));
+
+ unsigned mapped_reg = reg - SP::I0 + SP::O0;
+ assert(MRI.reg_nodbg_empty(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
- // Mark the reg unused.
- MRI.setPhysRegUnused(reg);
+ // Also replace register pair super-registers.
+ if ((reg - SP::I0) % 2 == 0) {
+ unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1;
+ unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1;
+ MRI.replaceRegWith(preg, mapped_preg);
+ }
}
// Rewrite MBB's Live-ins.
for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
+ for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
+ if (!MBB->isLiveIn(reg))
+ continue;
+ MBB->removeLiveIn(reg);
+ MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+ }
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
if (!MBB->isLiveIn(reg))
continue;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
index 29fc7b7..cbb4dc0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -39,6 +39,14 @@ public:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const override { return true; }
+
private:
// Remap input registers to output registers for leaf procedure.
void remapRegsForLeafProc(MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 340b72e..c4c6416 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Compiler.h"
@@ -62,6 +63,7 @@ public:
private:
SDNode* getGlobalBaseReg();
+ SDNode *SelectInlineAsm(SDNode *N);
};
} // end anonymous namespace
@@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
return true;
}
+
+// Re-assemble i64 arguments split up in SelectionDAGBuilder's
+// visitInlineAsm / GetRegistersForValue functions.
+//
+// Note: This function was copied from, and is essentially identical
+// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
+// such hacking-up is necessary; a rethink of how inline asm operands
+// are handled may be in order to make doing this more sane.
+//
+// TODO: fix inline asm support so I can simply tell it that 'i64'
+// inputs to asm need to be allocated to the IntPair register type,
+// and have that work. Then, delete this function.
+SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
+ std::vector<SDValue> AsmNodeOperands;
+ unsigned Flag, Kind;
+ bool Changed = false;
+ unsigned NumOps = N->getNumOperands();
+
+ // Normally, i64 data is bounded to two arbitrary GPRs for "%r"
+ // constraint. However, some instructions (e.g. ldd/std) require
+ // (even/even+1) GPRs.
+
+ // So, here, we check for this case, and mutate the inlineasm to use
+ // a single IntPair register instead, which guarantees such even/odd
+ // placement.
+
+ SDLoc dl(N);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+ : SDValue(nullptr,0);
+
+ SmallVector<bool, 8> OpChanged;
+ // Glue node will be appended late.
+ for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
+ SDValue op = N->getOperand(i);
+ AsmNodeOperands.push_back(op);
+
+ if (i < InlineAsm::Op_FirstOperand)
+ continue;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+ Flag = C->getZExtValue();
+ Kind = InlineAsm::getKind(Flag);
+ }
+ else
+ continue;
+
+ // Immediate operands to inline asm in the SelectionDAG are modeled with
+ // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+ // the second is a constant with the value of the immediate. If we get here
+ // and we have a Kind_Imm, skip the next operand, and continue.
+ if (Kind == InlineAsm::Kind_Imm) {
+ SDValue op = N->getOperand(++i);
+ AsmNodeOperands.push_back(op);
+ continue;
+ }
+
+ unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+ if (NumRegs)
+ OpChanged.push_back(false);
+
+ unsigned DefIdx = 0;
+ bool IsTiedToChangedOp = false;
+ // If it's a use that is tied with a previous def, it has no
+ // reg class constraint.
+ if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+ IsTiedToChangedOp = OpChanged[DefIdx];
+
+ if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+ && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+ continue;
+
+ unsigned RC;
+ bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+ if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
+ || NumRegs != 2)
+ continue;
+
+ assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
+ SDValue V0 = N->getOperand(i+1);
+ SDValue V1 = N->getOperand(i+2);
+ unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+ unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+ SDValue PairedReg;
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ if (Kind == InlineAsm::Kind_RegDef ||
+ Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+ // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+ // the original GPRs.
+
+ unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+ SDValue Chain = SDValue(N,0);
+
+ SDNode *GU = N->getGluedUser();
+ SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
+ Chain.getValue(1));
+
+ // Extract values from a GPRPair reg and copy to the original GPR reg.
+ SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
+ RegCopy);
+ SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
+ RegCopy);
+ SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+ RegCopy.getValue(1));
+ SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+ // Update the original glue user.
+ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+ Ops.push_back(T1.getValue(1));
+ CurDAG->UpdateNodeOperands(GU, Ops);
+ }
+ else {
+ // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+ // GPRPair and then pass the GPRPair to the inline asm.
+ SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+ // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+ SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+ Chain.getValue(1));
+ SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+ T0.getValue(1));
+ SDValue Pair = SDValue(
+ CurDAG->getMachineNode(
+ TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
+ {
+ CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
+ MVT::i32),
+ T0,
+ CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
+ T1,
+ CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
+ }),
+ 0);
+
+ // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+ // i32 VRs of inline asm with it.
+ unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+ PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+ Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+ Glue = Chain.getValue(1);
+ }
+
+ Changed = true;
+
+ if(PairedReg.getNode()) {
+ OpChanged[OpChanged.size() -1 ] = true;
+ Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+ if (IsTiedToChangedOp)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+ else
+ Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
+ // Replace the current flag.
+ AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+ Flag, dl, MVT::i32);
+ // Add the new register node and skip the original two GPRs.
+ AsmNodeOperands.push_back(PairedReg);
+ // Skip the next two GPRs.
+ i += 2;
+ }
+ }
+
+ if (Glue.getNode())
+ AsmNodeOperands.push_back(Glue);
+ if (!Changed)
+ return nullptr;
+
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+ CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
@@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::INLINEASM: {
+ SDNode *ResNode = SelectInlineAsm(N);
+ if (ResNode)
+ return ResNode;
+ break;
+ }
case SPISD::GLOBAL_BASE_REG:
return getGlobalBaseReg();
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4879d4e..5e70ffe 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
return true;
}
-static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State)
+static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
@@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
return true;
}
+static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+ static const MCPhysReg RegList[] = {
+ SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+ };
+
+ // Try to get first reg.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ return false;
+
+ // Try to get second reg.
+ if (unsigned Reg = State.AllocateReg(RegList))
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ else
+ return false;
+
+ return true;
+}
+
// Allocate a full-sized argument for the 64-bit ABI.
static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
@@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
RetOps.push_back(SDValue());
// Copy the result values into the output registers.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
- OutVals[i], Flag);
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ if (VA.needsCustom()) {
+ assert(VA.getLocVT() == MVT::v2i32);
+ // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
+ // happen by default if this wasn't a legal type)
+
+ SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Arg,
+ DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+ Arg,
+ DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ VA = RVLocs[++i]; // skip ahead to next loc
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
+ Flag);
+ } else
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
@@ -355,6 +400,7 @@ LowerFormalArguments_32(SDValue Chain,
CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
const unsigned StackOffset = 92;
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
unsigned InIdx = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
@@ -375,7 +421,8 @@ LowerFormalArguments_32(SDValue Chain,
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
- assert(VA.getLocVT() == MVT::f64);
+ assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
+
unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
@@ -396,9 +443,13 @@ LowerFormalArguments_32(SDValue Chain,
&SP::IntRegsRegClass);
LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
+
+ if (IsLittleEndian)
+ std::swap(LoVal, HiVal);
+
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
- WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
@@ -422,7 +473,7 @@ LowerFormalArguments_32(SDValue Chain,
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (VA.needsCustom()) {
- assert(VA.getValVT() == MVT::f64);
+ assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
// If it is double-word aligned, just load.
if (Offset % 8 == 0) {
int FI = MF.getFrameInfo()->CreateFixedObject(8,
@@ -452,9 +503,12 @@ LowerFormalArguments_32(SDValue Chain,
MachinePointerInfo(),
false, false, false, 0);
+ if (IsLittleEndian)
+ std::swap(LoVal, HiVal);
+
SDValue WholeValue =
DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
- WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+ WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
InVals.push_back(WholeValue);
continue;
}
@@ -468,16 +522,12 @@ LowerFormalArguments_32(SDValue Chain,
Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
MachinePointerInfo(),
false, false, false, 0);
+ } else if (VA.getValVT() == MVT::f128) {
+ report_fatal_error("SPARCv8 does not handle f128 in calls; "
+ "pass indirectly");
} else {
- ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
- // Sparc is big endian, so add an offset based on the ObjectVT.
- unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
- FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
- DAG.getConstant(Offset, dl, MVT::i32));
- Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
- MachinePointerInfo(),
- VA.getValVT(), false, false, false,0);
- Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+ // We shouldn't see any other value types here.
+ llvm_unreachable("Unexpected ValVT encountered in frame lowering.");
}
InVals.push_back(Load);
}
@@ -612,7 +662,7 @@ LowerFormalArguments_64(SDValue Chain,
InVals.push_back(DAG.getLoad(
VA.getValVT(), DL, Chain,
DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
- MachinePointerInfo::getFixedStack(FI), false, false, false, 0));
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0));
}
if (!IsVarArg)
@@ -640,9 +690,9 @@ LowerFormalArguments_64(SDValue Chain,
SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
auto PtrVT = getPointerTy(MF.getDataLayout());
- OutChains.push_back(
- DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
- MachinePointerInfo::getFixedStack(FI), false, false, 0));
+ OutChains.push_back(DAG.getStore(
+ Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
}
if (!OutChains.empty())
@@ -788,7 +838,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
if (VA.needsCustom()) {
- assert(VA.getLocVT() == MVT::f64);
+ assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
if (VA.isMemLoc()) {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
@@ -804,49 +854,53 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
}
- SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
- Arg, StackPtr, MachinePointerInfo(),
- false, false, 0);
- // Sparc is big-endian, so the high part comes first.
- SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, false, 0);
- // Increment the pointer to the other half.
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getIntPtrConstant(4, dl));
- // Load the low part.
- SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
- MachinePointerInfo(), false, false, false, 0);
+ if (VA.getLocVT() == MVT::f64) {
+ // Move from the float value from float registers into the
+ // integer registers.
+
+ // TODO: The f64 -> v2i32 conversion is super-inefficient for
+ // constants: it sticks them in the constant pool, then loads
+ // to a fp register, then stores to temp memory, then loads to
+ // integer registers.
+ Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+ }
+
+ SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ Arg,
+ DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
+ SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ Arg,
+ DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
assert(i+1 != e);
CCValAssign &NextVA = ArgLocs[++i];
if (NextVA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
} else {
- // Store the low part in stack.
+ // Store the second part in stack.
unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
} else {
unsigned Offset = VA.getLocMemOffset() + StackOffset;
- // Store the high part.
+ // Store the first part.
SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
MachinePointerInfo(),
false, false, 0));
- // Store the low part.
+ // Store the second part.
PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
- MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
MachinePointerInfo(),
false, false, 0));
}
@@ -990,8 +1044,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
if (!CalleeFn)
return 0;
- assert(CalleeFn->hasStructRetAttr() &&
- "Callee does not have the StructRet attribute.");
+ // It would be nice to check for the sret attribute on CalleeFn here,
+ // but since it is not part of the function type, any check will misfire.
PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
Type *ElementTy = Ty->getElementType();
@@ -1370,15 +1424,60 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
const SparcSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
- auto &DL = *TM.getDataLayout();
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
+
+ // Instructions which use registers as conditionals examine all the
+ // bits (as does the pseudo SELECT_CC expansion). I don't think it
+ // matters much whether it's ZeroOrOneBooleanContent, or
+ // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
+ // former.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
- if (Subtarget->is64Bit())
+ if (Subtarget->is64Bit()) {
addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
+ } else {
+ // On 32bit sparc, we define a double-register 32bit register
+ // class, as well. This is modeled in LLVM as a 2-vector of i32.
+ addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
+
+ // ...but almost all operations must be expanded, so set that as
+ // the default.
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+ setOperationAction(Op, MVT::v2i32, Expand);
+ }
+ // Truncating/extending stores/loads are also not supported.
+ for (MVT VT : MVT::integer_vector_valuetypes()) {
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
+
+ setTruncStoreAction(VT, MVT::v2i32, Expand);
+ setTruncStoreAction(MVT::v2i32, VT, Expand);
+ }
+ // However, load and store *are* legal.
+ setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
+ setOperationAction(ISD::STORE, MVT::v2i32, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
+
+ // And we need to promote i64 loads/stores into vector load/store
+ setOperationAction(ISD::LOAD, MVT::i64, Custom);
+ setOperationAction(ISD::STORE, MVT::i64, Custom);
+
+ // Sadly, this doesn't work:
+ // AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+ // AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+ }
// Turn FP extload into load/fextend
for (MVT VT : MVT::fp_valuetypes()) {
@@ -1396,10 +1495,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
// Custom legalize GlobalAddress nodes into LO/HI parts.
- setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom);
- setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom);
- setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom);
- setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom);
+ setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+ setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+ setOperationAction(ISD::BlockAddress, PtrVT, Custom);
// Sparc doesn't have sext_inreg, replace them with shl/sra
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -1579,9 +1678,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setExceptionPointerRegister(SP::I0);
- setExceptionSelectorRegister(SP::I1);
-
setStackPointerRegisterToSaveRestore(SP::O6);
setOperationAction(ISD::CTPOP, MVT::i32,
@@ -1744,18 +1840,15 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
- if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isNullValue() &&
+ if (isNullConstant(RHS) &&
CC == ISD::SETNE &&
(((LHS.getOpcode() == SPISD::SELECT_ICC ||
LHS.getOpcode() == SPISD::SELECT_XCC) &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
(LHS.getOpcode() == SPISD::SELECT_FCC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
- isa<ConstantSDNode>(LHS.getOperand(0)) &&
- isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
- cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+ isOneConstant(LHS.getOperand(0)) &&
+ isNullConstant(LHS.getOperand(1))) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -1821,7 +1914,8 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setHasCalls(true);
return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// This is one of the absolute code models.
@@ -1872,6 +1966,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
+
SDLoc DL(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2601,6 +2698,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
return DAG.getMergeValues(Ops, dl);
}
+static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
+{
+ LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+ EVT MemVT = LdNode->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return LowerF128Load(Op, DAG);
+
+ return Op;
+}
+
// Lower a f128 store into two f64 stores.
static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -2645,6 +2753,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
+{
+ SDLoc dl(Op);
+ StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
+
+ EVT MemVT = St->getMemoryVT();
+ if (MemVT == MVT::f128)
+ return LowerF128Store(Op, DAG);
+
+ if (MemVT == MVT::i64) {
+ // Custom handling for i64 stores: turn it into a bitcast and a
+ // v2i32 store.
+ SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
+ SDValue Chain = DAG.getStore(
+ St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
+ St->getAAInfo());
+ return Chain;
+ }
+
+ return SDValue();
+}
+
static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
&& "invalid opcode");
@@ -2752,7 +2883,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue MulResult = TLI.makeLibCall(DAG,
RTLIB::MUL_I128, WideVT,
- Args, 4, isSigned, dl).first;
+ Args, isSigned, dl).first;
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
MulResult, DAG.getIntPtrConstant(0, dl));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
@@ -2783,7 +2914,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
-
SDValue SparcTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -2818,8 +2948,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
Subtarget);
- case ISD::LOAD: return LowerF128Load(Op, DAG);
- case ISD::STORE: return LowerF128Store(Op, DAG);
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::FADD: return LowerF128Op(Op, DAG,
getLibcallName(RTLIB::ADD_F128), 2);
case ISD::FSUB: return LowerF128Op(Op, DAG,
@@ -2921,8 +3051,7 @@ SparcTargetLowering::expandSelectCC(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -3007,7 +3136,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
.addReg(AddrReg).addImm(0);
// Split the basic block MBB before MI and insert the loop block in the hole.
- MachineFunction::iterator MFI = MBB;
+ MachineFunction::iterator MFI = MBB->getIterator();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -3149,9 +3278,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
- return std::make_pair(0U, &SP::IntRegsRegClass);
+ if (VT == MVT::v2i32)
+ return std::make_pair(0U, &SP::IntPairRegClass);
+ else
+ return std::make_pair(0U, &SP::IntRegsRegClass);
}
- } else if (!Constraint.empty() && Constraint.size() <= 5
+ } else if (!Constraint.empty() && Constraint.size() <= 5
&& Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
// constraint = '{r<d>}'
// Remove the braces from around the name.
@@ -3227,5 +3359,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
getLibcallName(libCall),
1));
return;
+ case ISD::LOAD: {
+ LoadSDNode *Ld = cast<LoadSDNode>(N);
+ // Custom handling only for i64: turn i64 load into a v2i32 load,
+ // and a bitcast.
+ if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
+ return;
+
+ SDLoc dl(N);
+ SDValue LoadRes = DAG.getExtLoad(
+ Ld->getExtensionType(), dl, MVT::v2i32,
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+ MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());
+
+ SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
+ Results.push_back(Res);
+ Results.push_back(LoadRes.getValue(1));
+ return;
+ }
}
}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index bbc91a4..4e46709 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -89,6 +89,20 @@ namespace llvm {
return MVT::i32;
}
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return SP::I0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return SP::I1;
+ }
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -167,8 +181,8 @@ namespace llvm {
}
void ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) const override;
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const override;
MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
index 25cc652..d51e2cc 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -250,6 +250,7 @@ defm : int_cond_alias<"n", 0b0000>;
defm : int_cond_alias<"ne", 0b1001>;
defm : int_cond_alias<"nz", 0b1001>; // same as ne
defm : int_cond_alias<"e", 0b0001>;
+defm : int_cond_alias<"eq", 0b0001>; // same as e
defm : int_cond_alias<"z", 0b0001>; // same as e
defm : int_cond_alias<"g", 0b1010>;
defm : int_cond_alias<"le", 0b0010>;
@@ -429,6 +430,9 @@ def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
def : InstAlias<"flush", (FLUSH), 0>;
+def : MnemonicAlias<"lduw", "ld">, Requires<[HasV9]>;
+def : MnemonicAlias<"lduwa", "lda">, Requires<[HasV9]>;
+
def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>;
def : MnemonicAlias<"addc", "addx">, Requires<[HasV9]>;
@@ -450,3 +454,8 @@ def : InstAlias<"fcmpeq $rs1, $rs2", (V9FCMPEQ FCC0, QFPRegs:$rs1,
QFPRegs:$rs2)>,
Requires<[HasHardQuad]>;
+// signx rd -> sra rd, %g0, rd
+def : InstAlias<"signx $rd", (SRArr IntRegs:$rd, IntRegs:$rd, G0), 0>, Requires<[HasV9]>;
+
+// signx reg, rd -> sra reg, %g0, rd
+def : InstAlias<"signx $rs1, $rd", (SRArr IntRegs:$rd, IntRegs:$rs1, G0), 0>, Requires<[HasV9]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 6167c53..733027a 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned numSubRegs = 0;
unsigned movOpc = 0;
const unsigned *subRegIdx = nullptr;
+ bool ExtraG0 = false;
+ const unsigned DW_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
const unsigned QFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd,
@@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
.addReg(SrcReg, getKillRegState(KillSrc));
- else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
+ subRegIdx = DW_SubRegsIdx;
+ numSubRegs = 2;
+ movOpc = SP::ORrr;
+ ExtraG0 = true;
+ } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
@@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Src = TRI->getSubReg(SrcReg, subRegIdx[i]);
assert(Dst && Src && "Bad sub-register");
- MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
+ if (ExtraG0)
+ MIB.addReg(SP::G0);
+ MIB.addReg(Src);
+ MovMI = MIB.getInstr();
}
// Add implicit super-register defs and kills to the last MovMI.
MovMI->addRegisterDefined(DestReg, TRI);
@@ -365,19 +376,20 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
// On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+ if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+ else if (RC == &SP::IntPairRegClass)
+ BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
.addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
@@ -403,11 +415,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
if (RC == &SP::I64RegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0)
@@ -415,6 +425,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if (RC == &SP::IntRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
+ else if (RC == &SP::IntPairRegClass)
+ BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
+ .addMemOperand(MMO);
else if (RC == &SP::FPRegsRegClass)
BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 3b9e048..ec37c22 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -283,17 +283,32 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
[(set Ty:$dst, (OpNode ADDRri:$addr))]>;
}
+// TODO: Instructions of the LoadASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class LoadASI<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
+ RegisterClass RC, ValueType Ty> :
+ F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
+ !strconcat(OpcStr, "a [$addr] $asi, $dst"),
+ []>;
+
// LoadA multiclass - As above, but also define alternate address space variant
multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
Load<OpcStr, Op3Val, OpNode, RC, Ty> {
- // TODO: The LD*Arr instructions are currently asm only; hooking up
- // CodeGen's address spaces to use these is a future task.
- def Arr : F3_1_asi<3, LoadAOp3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
- !strconcat(OpcStr, "a [$addr] $asi, $dst"),
- []>;
+ def Arr : LoadASI<OpcStr, LoadAOp3Val, OpNode, RC, Ty>;
}
+// The LDSTUB instruction is supported for asm only.
+// It is unlikely that general-purpose code could make use of it.
+// CAS is preferred for sparc v9.
+def LDSTUBrr : F3_1<3, 0b001101, (outs IntRegs:$dst), (ins MEMrr:$addr),
+ "ldstub [$addr], $dst", []>;
+def LDSTUBri : F3_2<3, 0b001101, (outs IntRegs:$dst), (ins MEMri:$addr),
+ "ldstub [$addr], $dst", []>;
+def LDSTUBArr : F3_1_asi<3, 0b011101, (outs IntRegs:$dst),
+ (ins MEMrr:$addr, i8imm:$asi),
+ "ldstuba [$addr] $asi, $dst", []>;
+
// Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot.
multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
RegisterClass RC, ValueType Ty> {
@@ -307,14 +322,18 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
[(OpNode Ty:$rd, ADDRri:$addr)]>;
}
-multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
+// TODO: Instructions of the StoreASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class StoreASI<string OpcStr, bits<6> Op3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
- Store<OpcStr, Op3Val, OpNode, RC, Ty> {
- // TODO: The ST*Arr instructions are currently asm only; hooking up
- // CodeGen's address spaces to use these is a future task.
- def Arr : F3_1_asi<3, StoreAOp3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
+ F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
!strconcat(OpcStr, "a $rd, [$addr] $asi"),
[]>;
+
+multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
+ SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
+ Store<OpcStr, Op3Val, OpNode, RC, Ty> {
+ def Arr : StoreASI<OpcStr, StoreAOp3Val, OpNode, RC, Ty>;
}
//===----------------------------------------------------------------------===//
@@ -408,15 +427,40 @@ let DecoderMethod = "DecodeLoadInt" in {
defm LD : LoadA<"ld", 0b000000, 0b010000, load, IntRegs, i32>;
}
+let DecoderMethod = "DecodeLoadIntPair" in
+ defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;
+
// Section B.2 - Load Floating-point Instructions, p. 92
-let DecoderMethod = "DecodeLoadFP" in
- defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
-let DecoderMethod = "DecodeLoadDFP" in
- defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>;
+let DecoderMethod = "DecodeLoadFP" in {
+ defm LDF : Load<"ld", 0b100000, load, FPRegs, f32>;
+ def LDFArr : LoadASI<"ld", 0b110000, load, FPRegs, f32>,
+ Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeLoadDFP" in {
+ defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64>;
+ def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>,
+ Requires<[HasV9]>;
+}
let DecoderMethod = "DecodeLoadQFP" in
- defm LDQF : Load<"ldq", 0b100010, load, QFPRegs, f128>,
+ defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
+let DecoderMethod = "DecodeLoadFP" in
+ let Defs = [FSR] in {
+ let rd = 0 in {
+ def LDFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+ "ld [$addr], %fsr", []>;
+ def LDFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+ "ld [$addr], %fsr", []>;
+ }
+ let rd = 1 in {
+ def LDXFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+ "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+ def LDXFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+ "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+ }
+ }
+
// Section B.4 - Store Integer Instructions, p. 95
let DecoderMethod = "DecodeStoreInt" in {
defm STB : StoreA<"stb", 0b000101, 0b010101, truncstorei8, IntRegs, i32>;
@@ -424,15 +468,40 @@ let DecoderMethod = "DecodeStoreInt" in {
defm ST : StoreA<"st", 0b000100, 0b010100, store, IntRegs, i32>;
}
+let DecoderMethod = "DecodeStoreIntPair" in
+ defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
+
// Section B.5 - Store Floating-point Instructions, p. 97
-let DecoderMethod = "DecodeStoreFP" in
+let DecoderMethod = "DecodeStoreFP" in {
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
-let DecoderMethod = "DecodeStoreDFP" in
- defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>;
+ def STFArr : StoreASI<"st", 0b110100, store, FPRegs, f32>,
+ Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeStoreDFP" in {
+ defm STDF : Store<"std", 0b100111, store, DFPRegs, f64>;
+ def STDFArr : StoreASI<"std", 0b110111, store, DFPRegs, f64>,
+ Requires<[HasV9]>;
+}
let DecoderMethod = "DecodeStoreQFP" in
- defm STQF : Store<"stq", 0b100110, store, QFPRegs, f128>,
+ defm STQF : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
Requires<[HasV9, HasHardQuad]>;
+let DecoderMethod = "DecodeStoreFP" in
+ let Defs = [FSR] in {
+ let rd = 0 in {
+ def STFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+ "st %fsr, [$addr]", []>;
+ def STFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+ "st %fsr, [$addr]", []>;
+ }
+ let rd = 1 in {
+ def STXFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+ "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+ def STXFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+ "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+ }
+ }
+
// Section B.8 - SWAP Register with Memory Instruction
// (Atomic swap)
let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
@@ -559,6 +628,10 @@ let Defs = [Y, ICC] in {
defm SMULCC : F3_12np<"smulcc", 0b011011>;
}
+let Defs = [Y, ICC], Uses = [Y, ICC] in {
+ defm MULSCC : F3_12np<"mulscc", 0b100100>;
+}
+
// Section B.19 - Divide Instructions, p. 115
let Uses = [Y], Defs = [Y] in {
defm UDIV : F3_12np<"udiv", 0b001110>;
@@ -1221,8 +1294,8 @@ let Predicates = [HasV9] in {
// the top 32-bits before using it. To do this clearing, we use a SRLri X,0.
let rs1 = 0 in
def POPCrr : F3_1<2, 0b101110,
- (outs IntRegs:$dst), (ins IntRegs:$src),
- "popc $src, $dst", []>, Requires<[HasV9]>;
+ (outs IntRegs:$rd), (ins IntRegs:$rs2),
+ "popc $rs2, $rd", []>, Requires<[HasV9]>;
def : Pat<(ctpop i32:$src),
(POPCrr (SRLri $src, 0))>;
@@ -1254,6 +1327,25 @@ let hasSideEffects = 1 in {
}
}
+
+// Section A.43 - Read Privileged Register Instructions
+let Predicates = [HasV9] in {
+let rs2 = 0 in
+ def RDPR : F3_1<2, 0b101010,
+ (outs IntRegs:$rd), (ins PRRegs:$rs1),
+ "rdpr $rs1, $rd", []>;
+}
+
+// Section A.62 - Write Privileged Register Instructions
+let Predicates = [HasV9] in {
+ def WRPRrr : F3_1<2, 0b110010,
+ (outs PRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2),
+ "wrpr $rs1, $rs2, $rd", []>;
+ def WRPRri : F3_2<2, 0b110010,
+ (outs PRRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
+ "wrpr $rs1, $simm13, $rd", []>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
@@ -1327,6 +1419,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>;
def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
+// extract_vector
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 0),
+ (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 1),
+ (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;
+
+// build_vector
+def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
+ (i32 IntRegs:$a2), sub_odd)>;
+
include "SparcInstr64Bit.td"
include "SparcInstrVIS.td"
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9667bc0..da31783 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(SP::G6);
Reserved.set(SP::G7);
+ // Also reserve the register pair aliases covering the above
+ // registers, with the same conditions.
+ Reserved.set(SP::G0_G1);
+ if (ReserveAppRegisters)
+ Reserved.set(SP::G2_G3);
+ if (ReserveAppRegisters || !Subtarget.is64Bit())
+ Reserved.set(SP::G4_G5);
+
+ Reserved.set(SP::O6_O7);
+ Reserved.set(SP::I6_I7);
+ Reserved.set(SP::G6_G7);
+
// Unaliased double registers are not available in non-V9 targets.
if (!Subtarget.isV9()) {
for (unsigned n = 0; n != 16; ++n) {
@@ -158,21 +170,15 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-
- // Addressable stack objects are accessed using neg. offsets from %fp
MachineFunction &MF = *MI.getParent()->getParent();
const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
- int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MI.getOperand(FIOperandNum + 1).getImm() +
- Subtarget.getStackPointerBias();
- SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
- unsigned FramePtr = SP::I6;
- if (FuncInfo->isLeafProc()) {
- // Use %sp and adjust offset if needed.
- FramePtr = SP::O6;
- int stackSize = MF.getFrameInfo()->getStackSize();
- Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
- }
+ const SparcFrameLowering *TFI = getFrameLowering(MF);
+
+ unsigned FrameReg;
+ int Offset;
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
if (MI.getOpcode() == SP::STQFri) {
@@ -182,8 +188,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned SrcOddReg = getSubReg(SrcReg, SP::sub_odd64);
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
- .addReg(FramePtr).addImm(0).addReg(SrcEvenReg);
- replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr);
+ .addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
+ replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
MI.setDesc(TII.get(SP::STDFri));
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
@@ -194,8 +200,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned DestOddReg = getSubReg(DestReg, SP::sub_odd64);
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
- .addReg(FramePtr).addImm(0);
- replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr);
+ .addReg(FrameReg).addImm(0);
+ replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
MI.setDesc(TII.get(SP::LDDFri));
MI.getOperand(0).setReg(DestOddReg);
@@ -203,7 +209,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
}
- replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr);
+ replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
}
@@ -211,3 +217,25 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
+// Sparc has no architectural need for stack realignment support,
+// except that LLVM unfortunately currently implements overaligned
+// stack objects by depending upon stack realignment support.
+// If that ever changes, this can probably be deleted.
+bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (!TargetRegisterInfo::canRealignStack(MF))
+ return false;
+
+ // Sparc always has a fixed frame pointer register, so don't need to
+ // worry about needing to reserve it. [even if we don't have a frame
+ // pointer for our frame, it still cannot be used for other things,
+ // or register window traps will be SADNESS.]
+
+ // If there's a reserved call frame, we can use SP to access locals.
+ if (getFrameLowering(MF)->hasReservedCallFrame(MF))
+ return true;
+
+ // Otherwise, we'd need a base pointer, but those aren't implemented
+ // for SPARC at the moment.
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 764a894..32075b1 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -42,8 +42,10 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const;
- // Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+ bool canRealignStack(const MachineFunction &MF) const override;
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index db8a7e8..cca9463 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -32,6 +32,12 @@ def sub_odd64 : SubRegIndex<64, 64>;
// Ri - 32-bit integer registers
class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
+// Rdi - pairs of 32-bit integer registers
+class Rdi<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
+ let CoveredBySubRegs = 1;
+}
// Rf - 32-bit floating-point registers
class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
@@ -54,6 +60,8 @@ def ICC : SparcCtrlReg<0, "ICC">; // This represents icc and xcc in 64-bit code.
foreach I = 0-3 in
def FCC#I : SparcCtrlReg<I, "FCC"#I>;
+def FSR : SparcCtrlReg<0, "FSR">; // Floating-point state register.
+
// Y register
def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>;
// Ancillary state registers (implementation defined)
@@ -94,6 +102,22 @@ def PSR : SparcCtrlReg<0, "PSR">;
def WIM : SparcCtrlReg<0, "WIM">;
def TBR : SparcCtrlReg<0, "TBR">;
+def TPC : SparcCtrlReg<0, "TPC">;
+def TNPC : SparcCtrlReg<1, "TNPC">;
+def TSTATE : SparcCtrlReg<2, "TSTATE">;
+def TT : SparcCtrlReg<3, "TT">;
+def TICK : SparcCtrlReg<4, "TICK">;
+def TBA : SparcCtrlReg<5, "TBA">;
+def PSTATE : SparcCtrlReg<6, "PSTATE">;
+def TL : SparcCtrlReg<7, "TL">;
+def PIL : SparcCtrlReg<8, "PIL">;
+def CWP : SparcCtrlReg<9, "CWP">;
+def CANSAVE : SparcCtrlReg<10, "CANSAVE">;
+def CANRESTORE : SparcCtrlReg<11, "CANRESTORE">;
+def CLEANWIN : SparcCtrlReg<12, "CLEANWIN">;
+def OTHERWIN : SparcCtrlReg<13, "OTHERWIN">;
+def WSTATE : SparcCtrlReg<14, "WSTATE">;
+
// Integer registers
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
@@ -217,6 +241,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>;
def Q14 : Rq<25, "F56", [D28, D29]>;
def Q15 : Rq<29, "F60", [D30, D31]>;
+// Aliases of the integer registers used for LDD/STD double-word operations
+def G0_G1 : Rdi<0, "G0", [G0, G1]>;
+def G2_G3 : Rdi<2, "G2", [G2, G3]>;
+def G4_G5 : Rdi<4, "G4", [G4, G5]>;
+def G6_G7 : Rdi<6, "G6", [G6, G7]>;
+def O0_O1 : Rdi<8, "O0", [O0, O1]>;
+def O2_O3 : Rdi<10, "O2", [O2, O3]>;
+def O4_O5 : Rdi<12, "O4", [O4, O5]>;
+def O6_O7 : Rdi<14, "O6", [O6, O7]>;
+def L0_L1 : Rdi<16, "L0", [L0, L1]>;
+def L2_L3 : Rdi<18, "L2", [L2, L3]>;
+def L4_L5 : Rdi<20, "L4", [L4, L5]>;
+def L6_L7 : Rdi<22, "L6", [L6, L7]>;
+def I0_I1 : Rdi<24, "I0", [I0, I1]>;
+def I2_I3 : Rdi<26, "I2", [I2, I3]>;
+def I4_I5 : Rdi<28, "I4", [I4, I5]>;
+def I6_I7 : Rdi<30, "I6", [I6, I7]>;
+
// Register classes.
//
// FIXME: the register order should be defined in terms of the preferred
@@ -231,6 +273,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
(sequence "L%u", 0, 7),
(sequence "O%u", 0, 7))>;
+// Should be in the same order as IntRegs.
+def IntPair : RegisterClass<"SP", [v2i32], 64,
+ (add I0_I1, I2_I3, I4_I5, I6_I7,
+ G0_G1, G2_G3, G4_G5, G6_G7,
+ L0_L1, L2_L3, L4_L5, L6_L7,
+ O0_O1, O2_O3, O4_O5, O6_O7)>;
+
// Register class for 64-bit mode, with a 64-bit spill slot size.
// These are the same as the 32-bit registers, so TableGen will consider this
// to be a sub-class of IntRegs. That works out because requiring a 64-bit
@@ -252,3 +301,8 @@ def ASRRegs : RegisterClass<"SP", [i32], 32,
(add Y, (sequence "ASR%u", 1, 31))> {
let isAllocatable = 0;
}
+
+// Privileged Registers
+def PRRegs : RegisterClass<"SP", [i64], 64,
+ (add TPC, TNPC, TSTATE, TT, TICK, TBA, PSTATE, TL, PIL, CWP,
+ CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE)>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index d69da40..d701594 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -64,7 +64,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
frameSize += 128;
// Frames with calls must also reserve space for 6 outgoing arguments
// whether they are used or not. LowerCall_64 takes care of that.
- assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned");
+ frameSize = RoundUpToAlignment(frameSize, 16);
} else {
// Emit the correct save instruction based on the number of bytes in
// the frame. Minimum stack frame size according to V8 ABI is:
@@ -81,3 +81,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
}
return frameSize;
}
+
+bool SparcSubtarget::enableMachineScheduler() const {
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 9d21911..e2fd2f0 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -60,6 +60,8 @@ public:
return &TSInfo;
}
+ bool enableMachineScheduler() const override;
+
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
bool isVIS2() const { return IsVIS2; }
@@ -85,7 +87,6 @@ public:
/// returns adjusted framesize which includes space for register window
/// spills and arguments.
int getAdjustedFrameSize(int stackSize) const;
-
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 3aa4c6b..9c995bf 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -349,7 +349,6 @@ class SystemZAsmParser : public MCTargetAsmParser {
#include "SystemZGenAsmMatcher.inc"
private:
- MCSubtargetInfo &STI;
MCAsmParser &Parser;
enum RegisterGroup {
RegGR,
@@ -386,14 +385,14 @@ private:
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
- SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+ SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), Parser(parser) {
+ : MCTargetAsmParser(Options, sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
// Override MCTargetAsmParser.
@@ -533,14 +532,16 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
}
// Parse a register of group Group. If Regs is nonnull, use it to map
-// the raw register number to LLVM numbering, with zero entries indicating
-// an invalid register. IsAddress says whether the register appears in an
-// address context.
+// the raw register number to LLVM numbering, with zero entries
+// indicating an invalid register. IsAddress says whether the
+// register appears in an address context. Allow FP Group if expecting
+// RegV Group, since the f-prefix yields the FP group even while used
+// with vector instructions.
bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
const unsigned *Regs, bool IsAddress) {
if (parseRegister(Reg))
return true;
- if (Reg.Group != Group)
+ if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
return Error(Reg.StartLoc, "invalid operand for instruction");
if (Regs && Regs[Reg.Num] == 0)
return Error(Reg.StartLoc, "invalid register pair");
@@ -791,7 +792,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (MatchResult) {
case Match_Success:
Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, getSTI());
return false;
case Match_MissingFeature: {
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 059ae3f..6444cf8 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -60,15 +60,15 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
O << '%' << getRegisterName(RegNo);
}
-template<unsigned N>
-void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isUInt<N>(Value) && "Invalid uimm argument");
O << Value;
}
-template<unsigned N>
-void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isInt<N>(Value) && "Invalid simm argument");
O << Value;
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index ba55e68..7ca386f 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,7 +15,6 @@
#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/Compiler.h"
namespace llvm {
class MCOperand;
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 5fefa31..2115d44 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -226,7 +226,7 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
// Register the MCCodeEmitter.
TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
- createSystemZMCCodeEmitter);
+ createSystemZMCCodeEmitter);
// Register the MCInstrInfo.
TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
diff --git a/contrib/llvm/lib/Target/SystemZ/README.txt b/contrib/llvm/lib/Target/SystemZ/README.txt
index e089047..cd367d6 100644
--- a/contrib/llvm/lib/Target/SystemZ/README.txt
+++ b/contrib/llvm/lib/Target/SystemZ/README.txt
@@ -52,12 +52,6 @@ We don't use the TEST DATA CLASS instructions.
--
-We could use the generic floating-point forms of LOAD COMPLEMENT,
-LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
-condition codes. For example, we could use LCDFR instead of LCDBR.
-
---
-
We only use MVC, XC and CLC for constant-length block operations.
We could extend them to variable-length operations too,
using EXECUTE RELATIVE LONG.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3dca7bd..7527311 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -288,7 +288,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()),
getModifierVariantKind(ZCPV->getModifier()),
OutContext);
- uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+ uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
OutStreamer->EmitValue(Expr, Size);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 44ea1d2..4a6beb6 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -26,21 +26,6 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
return new SystemZConstantPoolValue(GV, Modifier);
}
-unsigned SystemZConstantPoolValue::getRelocationInfo() const {
- switch (Modifier) {
- case SystemZCP::TLSGD:
- case SystemZCP::TLSLDM:
- case SystemZCP::DTPOFF:
- // May require a dynamic relocation.
- return 2;
- case SystemZCP::NTPOFF:
- // May require a relocation, but the relocations are always resolved
- // by the static linker.
- return 1;
- }
- llvm_unreachable("Unknown modifier");
-}
-
int SystemZConstantPoolValue::
getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
unsigned AlignMask = Alignment - 1;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
index e5f1bb1..a71b595 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -43,7 +43,6 @@ public:
Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
// Override MachineConstantPoolValue.
- unsigned getRelocationInfo() const override;
int getExistingMachineCPValue(MachineConstantPool *CP,
unsigned Alignment) override;
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 16f9adc..4818ed0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -37,13 +37,11 @@ namespace {
// instructions.
struct Reference {
Reference()
- : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
+ : Def(false), Use(false) {}
Reference &operator|=(const Reference &Other) {
Def |= Other.Def;
- IndirectDef |= Other.IndirectDef;
Use |= Other.Use;
- IndirectUse |= Other.IndirectUse;
return *this;
}
@@ -53,11 +51,6 @@ struct Reference {
// via a sub- or super-register.
bool Def;
bool Use;
-
- // True if the register is defined or used indirectly, by a sub- or
- // super-register.
- bool IndirectDef;
- bool IndirectUse;
};
class SystemZElimCompare : public MachineFunctionPass {
@@ -104,14 +97,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
return false;
}
-// Return true if any CC result of MI would reflect the value of subreg
-// SubReg of Reg.
-static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
+// Return true if any CC result of MI would reflect the value of Reg.
+static bool resultTests(MachineInstr *MI, unsigned Reg) {
if (MI->getNumOperands() > 0 &&
MI->getOperand(0).isReg() &&
MI->getOperand(0).isDef() &&
- MI->getOperand(0).getReg() == Reg &&
- MI->getOperand(0).getSubReg() == SubReg)
+ MI->getOperand(0).getReg() == Reg)
return true;
switch (MI->getOpcode()) {
@@ -127,30 +118,25 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
case SystemZ::LTEBR:
case SystemZ::LTDBR:
case SystemZ::LTXBR:
- if (MI->getOperand(1).getReg() == Reg &&
- MI->getOperand(1).getSubReg() == SubReg)
+ if (MI->getOperand(1).getReg() == Reg)
return true;
}
return false;
}
-// Describe the references to Reg in MI, including sub- and super-registers.
+// Describe the references to Reg or any of its aliases in MI.
Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
Reference Ref;
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
if (MO.isReg()) {
if (unsigned MOReg = MO.getReg()) {
- if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
- if (MO.isUse()) {
+ if (TRI->regsOverlap(MOReg, Reg)) {
+ if (MO.isUse())
Ref.Use = true;
- Ref.IndirectUse |= (MOReg != Reg);
- }
- if (MO.isDef()) {
+ else if (MO.isDef())
Ref.Def = true;
- Ref.IndirectDef |= (MOReg != Reg);
- }
}
}
}
@@ -158,6 +144,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
return Ref;
}
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+ // If we during isel used a load-and-test as a compare with 0, the
+ // def operand is dead.
+ return ((MI->getOpcode() == SystemZ::LTEBR ||
+ MI->getOpcode() == SystemZ::LTDBR ||
+ MI->getOpcode() == SystemZ::LTXBR) &&
+ MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+ unsigned reg = 0;
+ if (Compare->isCompare())
+ reg = Compare->getOperand(0).getReg();
+ else if (isLoadAndTestAsCmp(Compare))
+ reg = Compare->getOperand(1).getReg();
+ assert (reg);
+
+ return reg;
+}
+
// Compare compares the result of MI against zero. If MI is an addition
// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
// and convert the branch to a BRCT(G). Return true on success.
@@ -188,7 +198,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
// We already know that there are no references to the register between
// MI and Compare. Make sure that there are also no references between
// Compare and Branch.
- unsigned SrcReg = Compare->getOperand(0).getReg();
+ unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
for (++MBBI; MBBI != MBBE; ++MBBI)
if (getRegReferences(MBBI, SrcReg))
@@ -196,16 +206,15 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
// The transformation is OK. Rebuild Branch as a BRCT(G).
MachineOperand Target(Branch->getOperand(2));
- Branch->RemoveOperand(2);
- Branch->RemoveOperand(1);
- Branch->RemoveOperand(0);
+ while (Branch->getNumOperands())
+ Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
.addOperand(MI->getOperand(0))
.addOperand(MI->getOperand(1))
.addOperand(Target)
.addReg(SystemZ::CC, RegState::ImplicitDefine);
- MI->removeFromParent();
+ MI->eraseFromParent();
return true;
}
@@ -308,6 +317,10 @@ static bool isCompareZero(MachineInstr *Compare) {
return true;
default:
+
+ if (isLoadAndTestAsCmp(Compare))
+ return true;
+
return (Compare->getNumExplicitOperands() == 2 &&
Compare->getOperand(1).isImm() &&
Compare->getOperand(1).getImm() == 0);
@@ -325,8 +338,7 @@ optimizeCompareZero(MachineInstr *Compare,
return false;
// Search back for CC results that are based on the first operand.
- unsigned SrcReg = Compare->getOperand(0).getReg();
- unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+ unsigned SrcReg = getCompareSourceReg(Compare);
MachineBasicBlock &MBB = *Compare->getParent();
MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
Reference CCRefs;
@@ -334,7 +346,7 @@ optimizeCompareZero(MachineInstr *Compare,
while (MBBI != MBBE) {
--MBBI;
MachineInstr *MI = MBBI;
- if (resultTests(MI, SrcReg, SrcSubReg)) {
+ if (resultTests(MI, SrcReg)) {
// Try to remove both MI and Compare by converting a branch to BRCT(G).
// We don't care in this case whether CC is modified between MI and
// Compare.
@@ -435,23 +447,21 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
while (MBBI != MBB.begin()) {
MachineInstr *MI = --MBBI;
if (CompleteCCUsers &&
- MI->isCompare() &&
+ (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
(optimizeCompareZero(MI, CCUsers) ||
fuseCompareAndBranch(MI, CCUsers))) {
++MBBI;
- MI->removeFromParent();
+ MI->eraseFromParent();
Changed = true;
CCUsers.clear();
- CompleteCCUsers = true;
continue;
}
- Reference CCRefs(getRegReferences(MI, SystemZ::CC));
- if (CCRefs.Def) {
+ if (MI->definesRegister(SystemZ::CC)) {
CCUsers.clear();
- CompleteCCUsers = !CCRefs.IndirectDef;
+ CompleteCCUsers = true;
}
- if (CompleteCCUsers && CCRefs.Use)
+ if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers)
CCUsers.push_back(MI);
}
return Changed;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 397de47..e1b20d0 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -48,7 +48,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
SystemZFrameLowering::SystemZFrameLowering()
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize, 8) {
+ -SystemZMC::CallFrameSize, 8,
+ false /* StackRealignable */) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
@@ -133,7 +134,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
bool IsVarArg = MF.getFunction()->isVarArg();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ DebugLoc DL;
// Scan the call-saved GPRs and find the bounds of the register spill area.
unsigned LowGPR = 0;
@@ -322,7 +323,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
bool HasFP = hasFP(MF);
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc DL;
// The current offset of the stack pointer from the CFA.
int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
@@ -394,7 +398,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Add CFI for the this save.
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- int64_t Offset = getFrameIndexOffset(MF, Save.getFrameIdx());
+ unsigned IgnoredFrameReg;
+ int64_t Offset =
+ getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
+
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, DwarfReg, SPOffsetFromCFA + Offset));
CFIIndexes.push_back(CFIIndex);
@@ -455,9 +462,14 @@ bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
}
-int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
// Start with the offset of FI from the top of the caller-allocated frame
// (i.e. the top of the 160 bytes allocated by the caller). This initial
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 5ade757..46bb6b7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -43,7 +43,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool hasFP(const MachineFunction &MF) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75fd37f..a909309 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -585,7 +585,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
if (N.getNode()->getNodeId() == -1 ||
N.getNode()->getNodeId() > Pos->getNodeId()) {
- DAG->RepositionNode(Pos, N.getNode());
+ DAG->RepositionNode(Pos->getIterator(), N.getNode());
N.getNode()->setNodeId(Pos->getNodeId());
}
}
@@ -801,7 +801,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
RxSBG.Input = N.getOperand(0);
return true;
}
-
+
case ISD::ANY_EXTEND:
// Bits above the extended operand are don't-care.
RxSBG.Input = N.getOperand(0);
@@ -818,7 +818,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
return true;
}
// Fall through.
-
+
case ISD::SIGN_EXTEND: {
// Check that the extension bits are don't-care (i.e. are masked out
// by the final mask).
@@ -938,7 +938,23 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
return nullptr;
}
- }
+ }
+
+ // If the RISBG operands require no rotation and just masks the bottom
+ // 8/16 bits, attempt to convert this to a LLC zero extension.
+ if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) {
+ unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR);
+ if (VT == MVT::i32) {
+ if (Subtarget->hasHighWord())
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux);
+ else
+ OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR);
+ }
+
+ SDValue In = convertTo(DL, VT, RISBG.Input);
+ N = CurDAG->getMachineNode(OpCode, DL, VT, In);
+ return convertTo(DL, VT, SDValue(N, 0)).getNode();
+ }
unsigned Opcode = SystemZ::RISBG;
// Prefer RISBGN if available, since it does not clobber CC.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 9a753c8..ee73267 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -84,8 +84,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
const SystemZSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- auto &DL = *TM.getDataLayout();
- MVT PtrVT = getPointerTy(DL);
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the register classes.
if (Subtarget.hasHighWord())
@@ -115,8 +114,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
- setExceptionPointerRegister(SystemZ::R6D);
- setExceptionSelectorRegister(SystemZ::R7D);
setStackPointerRegisterToSaveRestore(SystemZ::R15D);
// TODO: It may be better to default to latency-oriented scheduling, however
@@ -370,7 +367,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// No special instructions for these.
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
}
}
@@ -776,9 +775,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
}
bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!CI->isTailCall())
- return false;
- return true;
+ return CI->isTailCall();
}
// We do not yet support 128-bit single-element vector types. If the user
@@ -939,8 +936,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getIntPtrConstant(4, DL));
ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false,
+ false, false, 0);
}
// Convert the value of the argument register into the value that's
@@ -976,9 +973,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
&SystemZ::FP64BitRegClass);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
- MachinePointerInfo::getFixedStack(FI),
+ MachinePointerInfo::getFixedStack(MF, FI),
false, false, 0);
-
}
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -1060,9 +1056,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
ArgValue = SpillSlot;
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@@ -1607,8 +1603,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
} else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
if (Value > Mask)
return;
- assert(C.ICmpType == SystemZICMP::Any &&
- "Signedness shouldn't matter here.");
+ // If the constant is in range, we can use any comparison.
+ C.ICmpType = SystemZICMP::Any;
} else
return;
@@ -2439,7 +2435,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// If there was a non-zero offset that we didn't fold, create an explicit
@@ -2499,7 +2496,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
}
SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(Node, DAG);
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2529,9 +2528,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
@@ -2544,9 +2544,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
// Call __tls_get_offset to retrieve the module base offset.
Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
@@ -2562,9 +2563,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
- DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- DTPOffset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ DTPOffset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), DTPOffset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
break;
@@ -2575,8 +2577,8 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
SystemZII::MO_INDNTPOFF);
Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getGOT(),
+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
false, false, false, 0);
break;
}
@@ -2587,9 +2589,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
Offset = DAG.getConstantPool(CPV, PtrVT, 8);
- Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
- Offset, MachinePointerInfo::getConstantPool(),
- false, false, false, 0);
+ Offset = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Offset,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, 0);
break;
}
}
@@ -2628,10 +2631,10 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue Result;
if (CP->isMachineConstantPoolEntry())
Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment());
+ CP->getAlignment());
else
Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset());
+ CP->getAlignment(), CP->getOffset());
// Use LARL to load the address of the constant pool entry.
return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -2736,17 +2739,37 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
SDValue SystemZTargetLowering::
lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ bool RealignOpt = !DAG.getMachineFunction().getFunction()->
+ hasFnAttribute("no-realign-stack");
+
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
+ SDValue Align = Op.getOperand(2);
SDLoc DL(Op);
+ // If user has set the no alignment function attribute, ignore
+ // alloca alignments.
+ uint64_t AlignVal = (RealignOpt ?
+ dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+ uint64_t StackAlign = TFI->getStackAlignment();
+ uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+ uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
unsigned SPReg = getStackPointerRegisterToSaveRestore();
+ SDValue NeededSpace = Size;
// Get a reference to the stack pointer.
SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
+ // Add extra space for alignment if needed.
+ if (ExtraAlignSpace)
+ NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+
// Get the new stack pointer value.
- SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+ SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
// Copy the new stack pointer back.
Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
@@ -2757,6 +2780,16 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
+ // Dynamically realign if needed.
+ if (RequiredAlign > StackAlign) {
+ Result =
+ DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
+ DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+ Result =
+ DAG.getNode(ISD::AND, DL, MVT::i64, Result,
+ DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
+ }
+
SDValue Ops[2] = { Result, Chain };
return DAG.getMergeValues(Ops, DL);
}
@@ -2837,7 +2870,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
} else if (DAG.ComputeNumSignBits(Op1) > 32) {
Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
Opcode = SystemZISD::SDIVREM32;
- } else
+ } else
Opcode = SystemZISD::SDIVREM64;
// DSG(F) takes a 64-bit dividend, so the even register in the GR128
@@ -3247,8 +3280,8 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getNumValues() == 1)
return CC;
assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
- Glued, CC);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
+ CC);
}
unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3890,7 +3923,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
GS.addUndef();
} else {
GS.add(SDValue(), ResidueOps.size());
- ResidueOps.push_back(Op);
+ ResidueOps.push_back(BVN->getOperand(I));
}
}
@@ -3901,7 +3934,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
// Create the BUILD_VECTOR for the remaining elements, if any.
if (!ResidueOps.empty()) {
while (ResidueOps.size() < NumElements)
- ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+ ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
for (auto &Op : GS.Ops) {
if (!Op.getNode()) {
Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
@@ -4204,7 +4237,7 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const {
+ unsigned UnpackHigh) const {
SDValue PackedOp = Op.getOperand(0);
EVT OutVT = Op.getValueType();
EVT InVT = PackedOp.getValueType();
@@ -4566,9 +4599,9 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
}
return Op;
} else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
- canTreatAsByteVector(Op.getValueType()) &&
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ canTreatAsByteVector(Op.getValueType()) &&
canTreatAsByteVector(Op.getOperand(0).getValueType())) {
// Make sure that only the unextended bits are significant.
EVT ExtVT = Op.getValueType();
@@ -4579,14 +4612,14 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
unsigned SubByte = Byte % ExtBytesPerElement;
unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
if (SubByte < MinSubByte ||
- SubByte + BytesPerElement > ExtBytesPerElement)
- break;
+ SubByte + BytesPerElement > ExtBytesPerElement)
+ break;
// Get the byte offset of the unextended element
Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
// ...then add the byte offset relative to that element.
Byte += SubByte - MinSubByte;
if (Byte % BytesPerElement != 0)
- break;
+ break;
Op = Op.getOperand(0);
Index = Byte / BytesPerElement;
Force = true;
@@ -5611,6 +5644,31 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
return MBB;
}
+MachineBasicBlock *
+SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const {
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+
+ unsigned SrcReg = MI->getOperand(0).getReg();
+
+ // Create new virtual register of the same class as source.
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ unsigned DstReg = MRI->createVirtualRegister(RC);
+
+ // Replace pseudo with a normal load-and-test that models the def as
+ // well.
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
+ .addReg(SrcReg);
+ MI->eraseFromParent();
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -5858,6 +5916,13 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
case SystemZ::TBEGINC:
return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
+ case SystemZ::LTEBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
+ case SystemZ::LTDBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
+ case SystemZ::LTXBRCompare_VecPseudo:
+ return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 07ff251..391636e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -409,6 +409,20 @@ public:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R6D;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return SystemZ::R7D;
+ }
+
MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const
override;
@@ -481,7 +495,7 @@ private:
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
- unsigned UnpackHigh) const;
+ unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
@@ -530,6 +544,10 @@ private:
MachineBasicBlock *MBB,
unsigned Opcode,
bool NoFloat) const;
+ MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode) const;
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index 464f79a..5a1c874 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -35,11 +35,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
if (MCID.mayStore())
Flags |= MachineMemOperand::MOStore;
int64_t Offset = 0;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(
- PseudoSourceValue::getFixedStack(FI), Offset),
- Flags, MFFrame->getObjectSize(FI),
- MFFrame->getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+ MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI));
return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 27fbd7d..0cb2672 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,15 +46,28 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
}
-// Note that the comparison against zero operation is not available if we
-// have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register.
+// Note that LTxBRCompare is not available if we have vector support,
+// since load-and-test instructions will partially clobber the target
+// (vector) register.
let Predicates = [FeatureNoVector] in {
defm : CompareZeroFP<LTEBRCompare, FP32>;
defm : CompareZeroFP<LTDBRCompare, FP64>;
defm : CompareZeroFP<LTXBRCompare, FP128>;
}
+// Use a normal load-and-test for compare against zero in case of
+// vector support (via a pseudo to simplify instruction selection).
+let Defs = [CC], usesCustomInserter = 1 in {
+ def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
+ def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
+ def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
+}
+let Predicates = [FeatureVector] in {
+ defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
+ defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+ defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+}
+
// Moves between 64-bit integer and floating-point registers.
def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>;
@@ -238,26 +251,46 @@ let Predicates = [FeatureFPExtension] in {
// Unary arithmetic
//===----------------------------------------------------------------------===//
+// We prefer generic instructions during isel, because they do not
+// clobber CC and therefore give the scheduler more freedom. In cases
+// the CC is actually useful, the SystemZElimCompare pass will try to
+// convert generic instructions into opcodes that also set CC. Note
+// that lcdf / lpdf / lndf only affect the sign bit, and can therefore
+// be used with fp32 as well. This could be done for fp128, in which
+// case the operands would have to be tied.
+
// Negation (Load Complement).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32, FP32>;
- def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64, FP64>;
+ def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32, FP32>;
+ def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64, FP64>;
def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32, FP32>;
// Absolute value (Load Positive).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32, FP32>;
- def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64, FP64>;
+ def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32, FP32>;
+ def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64, FP64>;
def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32, FP32>;
// Negative absolute value (Load Negative).
let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32, FP32>;
- def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64, FP64>;
+ def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32, FP32>;
+ def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64, FP64>;
def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
}
+// Generic form, which does not set CC.
+def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64, FP64>;
+let isCodeGenOnly = 1 in
+ def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32, FP32>;
// Square root.
def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32, FP32>;
@@ -414,6 +447,6 @@ let Defs = [CC], CCValues = 0xF in {
// Peepholes
//===----------------------------------------------------------------------===//
-def : Pat<(f32 fpimmneg0), (LCEBR (LZER))>;
-def : Pat<(f64 fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f32 fpimmneg0), (LCDFR_32 (LZER))>;
+def : Pat<(f64 fpimmneg0), (LCDFR (LZDR))>;
def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 71eb998..01f4cde 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2381,6 +2381,7 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
(ins GR64:$R1src, GR64:$R2src),
mnemonic#"\t$R1, $R2", []> {
+ let Uses = [R0L];
let Constraints = "$R1 = $R1src, $R2 = $R2src";
let DisableEncoding = "$R1src, $R2src";
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5d4a34f..e6b5fc8 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -69,6 +69,11 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineOperand &LowOffsetOp = MI->getOperand(2);
LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
+ // Clear the kill flags for the base and index registers in the first
+ // instruction.
+ EarlierMI->getOperand(1).setIsKill(false);
+ EarlierMI->getOperand(3).setIsKill(false);
+
// Set the opcodes.
unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
@@ -111,7 +116,7 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
}
// MI is a three-operand RIE-style pseudo instruction. Replace it with
-// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// LowOpcodeK if the registers are both low GR32s, otherwise use a move
// followed by HighOpcode or LowOpcode, depending on whether the target
// is a high or low GR32.
void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
@@ -129,6 +134,7 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
MI->getOperand(1).isKill());
MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
MI->getOperand(1).setReg(DestReg);
+ MI->tieOperands(0, 1);
}
}
@@ -486,11 +492,8 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
const MachineRegisterInfo *MRI) const {
assert(!SrcReg2 && "Only optimizing constant comparisons so far");
bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
- if (Value == 0 &&
- !IsLogical &&
- removeIPMBasedCompare(Compare, SrcReg, MRI, &RI))
- return true;
- return false;
+ return Value == 0 && !IsLogical &&
+ removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
// If Opcode is a move that has a conditional variant, return that variant,
@@ -505,16 +508,13 @@ static unsigned getConditionalMove(unsigned Opcode) {
bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned Opcode = MI->getOpcode();
- if (STI.hasLoadStoreOnCond() &&
- getConditionalMove(Opcode))
- return true;
- return false;
+ return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode);
}
bool SystemZInstrInfo::
isProfitableToIfCvt(MachineBasicBlock &MBB,
unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
// For now only convert single instructions.
return NumCycles == 1;
}
@@ -524,7 +524,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumCyclesT, unsigned ExtraPredCyclesT,
MachineBasicBlock &FMBB,
unsigned NumCyclesF, unsigned ExtraPredCyclesF,
- const BranchProbability &Probability) const {
+ BranchProbability Probability) const {
// For now avoid converting mutually-exclusive cases.
return false;
}
@@ -548,11 +548,10 @@ PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
return false;
}
-void
-SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
@@ -590,13 +589,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-void
-SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::storeRegToStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
@@ -604,15 +600,14 @@ SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned LoadOpcode, StoreOpcode;
getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
- .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+ .addReg(SrcReg, getKillRegState(isKill)),
+ FrameIdx);
}
-void
-SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::loadRegFromStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ int FrameIdx, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
@@ -681,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LiveVariables *LV) const {
MachineInstr *MI = MBBI;
MachineBasicBlock *MBB = MI->getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Opcode = MI->getOpcode();
unsigned NumOps = MI->getNumOperands();
@@ -708,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
if (ThreeOperandOpcode >= 0) {
- MachineInstrBuilder MIB =
- BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
- .addOperand(Dest);
+ // Create three address instruction without adding the implicit
+ // operands. Those will instead be copied over from the original
+ // instruction by the loop below.
+ MachineInstrBuilder MIB(*MF,
+ MF->CreateMachineInstr(get(ThreeOperandOpcode),
+ MI->getDebugLoc(), /*NoImplicit=*/true));
+ MIB.addOperand(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
MIB.addOperand(MI->getOperand(I));
+ MBB->insert(MI, MIB);
return finishConvertToThreeAddress(MI, MIB, LV);
}
}
@@ -1191,6 +1192,12 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
case SystemZ::LER: return SystemZ::LTEBR;
case SystemZ::LDR: return SystemZ::LTDBR;
case SystemZ::LXR: return SystemZ::LTXBR;
+ case SystemZ::LCDFR: return SystemZ::LCDBR;
+ case SystemZ::LPDFR: return SystemZ::LPDBR;
+ case SystemZ::LNDFR: return SystemZ::LNDBR;
+ case SystemZ::LCDFR_32: return SystemZ::LCEBR;
+ case SystemZ::LPDFR_32: return SystemZ::LPEBR;
+ case SystemZ::LNDFR_32: return SystemZ::LNEBR;
// On zEC12 we prefer to use RISBGN. But if there is a chance to
// actually use the condition code, we may turn it back into RISGB.
// Note that RISBG is not really a "load-and-test" instruction,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 31c9db2..d9094ba 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -159,12 +159,12 @@ public:
bool isPredicable(MachineInstr *MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumCyclesT, unsigned ExtraPredCyclesT,
MachineBasicBlock &FMBB,
unsigned NumCyclesF, unsigned ExtraPredCyclesF,
- const BranchProbability &Probability) const override;
+ BranchProbability Probability) const override;
bool PredicateInstruction(MachineInstr *MI,
ArrayRef<MachineOperand> Pred) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 820f30b..b9f2eb5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -397,7 +397,7 @@ let mayLoad = 1, mayStore = 1 in
defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
// String moves.
-let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
//===----------------------------------------------------------------------===//
@@ -424,7 +424,7 @@ let hasSideEffects = 0 in {
def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
}
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
- def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>;
+ def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>;
// Match 32-to-64-bit sign extensions in which the source is already
// in a 64-bit register.
@@ -490,7 +490,7 @@ def : Pat<(and GR64:$src, 0xffffffff),
def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
Requires<[FeatureHighWord]>;
def LLC : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
-def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>,
Requires<[FeatureHighWord]>;
// 32-bit extensions from 16-bit memory. LLHMux expands to LLH or LLHH,
@@ -498,7 +498,7 @@ def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
Requires<[FeatureHighWord]>;
def LLH : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
-def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+def LLHH : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>,
Requires<[FeatureHighWord]>;
def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
@@ -1147,7 +1147,7 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
Requires<[FeatureHighWord]>;
def CLFI : CompareRIL<"clfi", 0xC2F, z_ucmp, GR32, uimm32>;
- def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GR32, uimm32>,
+ def CLIH : CompareRIL<"clih", 0xCCF, z_ucmp, GRH32, uimm32>,
Requires<[FeatureHighWord]>;
def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
@@ -1185,7 +1185,7 @@ let mayLoad = 1, Defs = [CC] in
defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
// String comparison.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
// Test under mask.
@@ -1459,9 +1459,29 @@ let usesCustomInserter = 1 in {
}
// Search a block of memory for a character.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
+// Other instructions for inline assembly
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
+ "stck\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2),
+ "stckf\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2),
+ "stcke\t$BD2",
+ []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+ def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2),
+ "stfle\t$BD2",
+ []>;
+
+
+
//===----------------------------------------------------------------------===//
// Peepholes.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 00572d0..1a7c0d7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 34fc36d..f4a517b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index dc7bd25..6fd24e3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -69,8 +69,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// Decompose the frame index into a base and offset.
int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
- unsigned BasePtr = getFrameRegister(MF);
- int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+ unsigned BasePtr;
+ int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
MI->getOperand(FIOperandNum + 1).getImm());
// Special handling of dbg_value instructions.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 85aa0a6..0d8b08b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -282,4 +282,5 @@ def v128any : TypedReg<untyped, VR128>;
// The 2-bit condition code field of the PSW. Every register named in an
// inline asm needs a class associated with it.
def CC : SystemZReg<"cc">;
-def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+let isAllocatable = 0 in
+ def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index d1a17c5..846edd5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -16,6 +16,8 @@
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -35,19 +37,16 @@ public:
bool runOnMachineFunction(MachineFunction &F) override;
private:
- bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
- unsigned LLIxL, unsigned LLIxH);
+ bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
bool shortenOn0(MachineInstr &MI, unsigned Opcode);
bool shortenOn01(MachineInstr &MI, unsigned Opcode);
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+ bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
-
- // LowGPRs[I] has bit N set if LLVM register I includes the low
- // word of GPR N. HighGPRs is the same for the high word.
- unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
- unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+ const TargetRegisterInfo *TRI;
+ LivePhysRegs LiveRegs;
};
char SystemZShortenInst::ID = 0;
@@ -58,33 +57,31 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
}
SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() {
- // Set up LowGPRs and HighGPRs.
- for (unsigned I = 0; I < 16; ++I) {
- LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
- LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
- HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
- HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
- if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
- LowGPRs[GR128] |= 3 << I;
- HighGPRs[GR128] |= 3 << I;
- }
- }
+ : MachineFunctionPass(ID), TII(nullptr) {}
+
+// Tie operands if MI has become a two-address instruction.
+static void tieOpsIfNeeded(MachineInstr &MI) {
+ if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ !MI.getOperand(0).isTied())
+ MI.tieOperands(0, 1);
}
// MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
// are the halfword immediate loads for the same word. Try to use one of them
-// instead of IIxF. If MI loads the high word, GPRMap[X] is the set of high
-// words referenced by LLVM register X while LiveOther is the mask of low
-// words that are currently live, and vice versa.
-bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
- unsigned LiveOther, unsigned LLIxL,
- unsigned LLIxH) {
+// instead of IIxF.
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI,
+ unsigned LLIxL, unsigned LLIxH) {
unsigned Reg = MI.getOperand(0).getReg();
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- unsigned GPRs = GPRMap[Reg];
- assert(GPRs != 0 && "Register must be a GPR");
- if (GPRs & LiveOther)
+ // The new opcode will clear the other half of the GR64 reg, so
+ // cancel if that is live.
+ unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ?
+ SystemZ::subreg_h32 : SystemZ::subreg_l32);
+ unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ?
+ SystemZ::subreg_h32 : SystemZ::subreg_l32);
+ unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx,
+ &SystemZ::GR64BitRegClass);
+ unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
+ if (LiveRegs.contains(OtherReg))
return false;
uint64_t Imm = MI.getOperand(1).getImm();
@@ -123,12 +120,26 @@ bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
}
// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
-// 4-bit encoding and if operands 0 and 1 are tied.
+// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0
+// with op 1, if MI becomes 2-address.
bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
MI.setDesc(TII->get(Opcode));
+ tieOpsIfNeeded(MI);
+ return true;
+ }
+ return false;
+}
+
+// Calls shortenOn001 if CCLive is false. CC def operand is added in
+// case of success.
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI,
+ unsigned Opcode) {
+ if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+ .addReg(SystemZ::CC, RegState::ImplicitDefine);
return true;
}
return false;
@@ -164,35 +175,24 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
- // Work out which words are live on exit from the block.
- unsigned LiveLow = 0;
- unsigned LiveHigh = 0;
- for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) {
- for (auto LI = (*SI)->livein_begin(), LE = (*SI)->livein_end();
- LI != LE; ++LI) {
- unsigned Reg = *LI;
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- LiveLow |= LowGPRs[Reg];
- LiveHigh |= HighGPRs[Reg];
- }
- }
+ // Set up the set of live registers at the end of MBB (live out)
+ LiveRegs.clear();
+ LiveRegs.addLiveOuts(&MBB);
// Iterate backwards through the block looking for instructions to change.
for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
MachineInstr &MI = *MBBI;
switch (MI.getOpcode()) {
case SystemZ::IILF:
- Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
- SystemZ::LLILH);
+ Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
break;
case SystemZ::IIHF:
- Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
- SystemZ::LLIHH);
+ Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
case SystemZ::WFADB:
- Changed |= shortenOn001(MI, SystemZ::ADBR);
+ Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
case SystemZ::WFDDB:
@@ -216,15 +216,15 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
case SystemZ::WFLCDB:
- Changed |= shortenOn01(MI, SystemZ::LCDBR);
+ Changed |= shortenOn01(MI, SystemZ::LCDFR);
break;
case SystemZ::WFLNDB:
- Changed |= shortenOn01(MI, SystemZ::LNDBR);
+ Changed |= shortenOn01(MI, SystemZ::LNDFR);
break;
case SystemZ::WFLPDB:
- Changed |= shortenOn01(MI, SystemZ::LPDBR);
+ Changed |= shortenOn01(MI, SystemZ::LPDFR);
break;
case SystemZ::WFSQDB:
@@ -232,7 +232,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
case SystemZ::WFSDB:
- Changed |= shortenOn001(MI, SystemZ::SDBR);
+ Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
break;
case SystemZ::WFCDB:
@@ -257,33 +257,17 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
break;
}
- unsigned UsedLow = 0;
- unsigned UsedHigh = 0;
- for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
- if (MO.isReg()) {
- if (unsigned Reg = MO.getReg()) {
- assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
- if (MO.isDef()) {
- LiveLow &= ~LowGPRs[Reg];
- LiveHigh &= ~HighGPRs[Reg];
- } else if (!MO.isUndef()) {
- UsedLow |= LowGPRs[Reg];
- UsedHigh |= HighGPRs[Reg];
- }
- }
- }
- }
- LiveLow |= UsedLow;
- LiveHigh |= UsedHigh;
+ LiveRegs.stepBackward(MI);
}
return Changed;
}
bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
- TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+ const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
+ TII = ST.getInstrInfo();
+ TRI = ST.getRegisterInfo();
+ LiveRegs.init(TRI);
bool Changed = false;
for (auto &MBB : F)
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 00cbbd1..f305e85 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -16,6 +16,7 @@
using namespace llvm;
+extern cl::opt<bool> MISchedPostRA;
extern "C" void LLVMInitializeSystemZTarget() {
// Register the target.
RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
@@ -32,7 +33,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
VectorABI = false;
SmallVector<StringRef, 3> Features;
- FS.split(Features, ",", -1, false /* KeepEmpty */);
+ FS.split(Features, ',', -1, false /* KeepEmpty */);
for (auto &Feature : Features) {
if (Feature == "vector" || Feature == "+vector")
VectorABI = true;
@@ -130,6 +131,13 @@ void SystemZPassConfig::addPreSched2() {
}
void SystemZPassConfig::addPreEmitPass() {
+
+ // Do instruction shortening before compare elimination because some
+ // vector instructions will be shortened into opcodes that compare
+ // elimination recognizes.
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
// We eliminate comparisons here rather than earlier because some
// transformations can change the set of available CC values and we
// generally want those transformations to have priority. This is
@@ -155,9 +163,17 @@ void SystemZPassConfig::addPreEmitPass() {
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
- if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+
+ // Do final scheduling after all other optimizations, to get an
+ // optimal input for the decoder (branch relaxation must happen
+ // after block placement).
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (MISchedPostRA)
+ addPass(&PostMachineSchedulerID);
+ else
+ addPass(&PostRASchedulerID);
+ }
}
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -165,7 +181,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(SystemZTTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
index 0a81e1f..1a8f1f7 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -43,6 +43,9 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
+
+ bool targetSchedulesPostRAScheduling() const override { return true; };
+
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5a87df1..5ff5b21 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
//
//===----------------------------------------------------------------------===//
-unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,8 +63,8 @@ unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
return 4 * TTI::TCC_Basic;
}
-unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -181,8 +181,8 @@ unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
return SystemZTTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b80973..9ae736d 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -28,7 +28,7 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
const SystemZTargetLowering *getTLI() const { return TLI; }
public:
- explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+ explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -42,12 +42,11 @@ public:
/// \name Scalar TTI Implementations
/// @{
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty);
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 19b5e2a..a0b0d8f 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,7 +43,6 @@ using namespace llvm;
void TargetLoweringObjectFile::Initialize(MCContext &ctx,
const TargetMachine &TM) {
Ctx = &ctx;
- DL = TM.getDataLayout();
InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(),
TM.getCodeModel(), *Ctx);
}
@@ -107,7 +106,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
assert(!Suffix.empty());
SmallString<60> NameStr;
- NameStr += DL->getPrivateGlobalPrefix();
+ NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix();
TM.getNameWithPrefix(NameStr, GV, Mang);
NameStr.append(Suffix.begin(), Suffix.end());
return Ctx->getOrCreateSymbol(NameStr);
@@ -120,7 +119,7 @@ MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
}
void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
- const TargetMachine &TM,
+ const DataLayout &,
const MCSymbol *Sym) const {
}
@@ -170,14 +169,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// If the initializer for the global contains something that requires a
// relocation, then we may have to drop this into a writable data section
// even though it is marked const.
- switch (C->getRelocationInfo()) {
- case Constant::NoRelocation:
+ if (!C->needsRelocation()) {
// If the global is required to have a unique address, it can't be put
// into a mergable section: just drop it into the general read-only
// section instead.
if (!GVar->hasUnnamedAddr())
return SectionKind::getReadOnly();
-
+
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
@@ -200,7 +198,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Otherwise, just drop it into a mergable constant section. If we have
// a section for this size, use it, otherwise use the arbitrary sized
// mergable section.
- switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+ switch (GV->getParent()->getDataLayout().getTypeAllocSize(C->getType())) {
case 4: return SectionKind::getMergeableConst4();
case 8: return SectionKind::getMergeableConst8();
case 16: return SectionKind::getMergeableConst16();
@@ -208,20 +206,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getReadOnly();
}
- case Constant::LocalRelocation:
- // In static relocation model, the linker will resolve all addresses, so
- // the relocation entries will actually be constants by the time the app
- // starts up. However, we can't put this into a mergable section, because
- // the linker doesn't take relocations into consideration when it tries to
- // merge entries in the section.
- if (ReloModel == Reloc::Static)
- return SectionKind::getReadOnly();
-
- // Otherwise, the dynamic linker needs to fix it up, put it in the
- // writable data.rel.local section.
- return SectionKind::getReadOnlyWithRelLocal();
-
- case Constant::GlobalRelocations:
+ } else {
// In static relocation model, the linker will resolve all addresses, so
// the relocation entries will actually be constants by the time the app
// starts up. However, we can't put this into a mergable section, because
@@ -242,17 +227,11 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// globals together onto fewer pages, improving the locality of the dynamic
// linker.
if (ReloModel == Reloc::Static)
- return SectionKind::getDataNoRel();
-
- switch (C->getRelocationInfo()) {
- case Constant::NoRelocation:
- return SectionKind::getDataNoRel();
- case Constant::LocalRelocation:
- return SectionKind::getDataRelLocal();
- case Constant::GlobalRelocations:
- return SectionKind::getDataRel();
- }
- llvm_unreachable("Invalid relocation");
+ return SectionKind::getData();
+
+ if (C->needsRelocation())
+ return SectionKind::getData();
+ return SectionKind::getData();
}
/// This method computes the appropriate section to emit the specified global
@@ -273,7 +252,8 @@ TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV,
MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
const Function &F, Mangler &Mang, const TargetMachine &TM) const {
- return getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr);
+ return getSectionForConstant(F.getParent()->getDataLayout(),
+ SectionKind::getReadOnly(), /*C=*/nullptr);
}
bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
@@ -296,9 +276,8 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
/// Given a mergable constant with the specified size and relocation
/// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
@@ -345,7 +324,7 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
}
void TargetLoweringObjectFile::getNameWithPrefix(
- SmallVectorImpl<char> &OutName, const GlobalValue *GV,
- bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
- Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+ SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM) const {
+ Mang.getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false);
}
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 83174c2..850c93c 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -150,24 +150,11 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
}
TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(F.getParent()->getDataLayout());
});
}
-static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
- const MCSection &Section) {
- if (!AsmInfo.isSectionAtomizableBySymbols(Section))
- return true;
-
- // If it is not dead stripped, it is safe to use private labels.
- const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
- if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
- return true;
-
- return false;
-}
-
void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
const GlobalValue *GV, Mangler &Mang,
bool MayAlwaysUsePrivate) const {
@@ -177,11 +164,8 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
Mang.getNameWithPrefix(Name, GV, false);
return;
}
- SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
- const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
- bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
- TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
+ TLOF->getNameWithPrefix(Name, GV, Mang, *this);
}
MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 7199235..f82566c 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -32,17 +32,25 @@
using namespace llvm;
-inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
- return reinterpret_cast<TargetMachine*>(P);
+namespace llvm {
+// Friend to the TargetMachine, access legacy API that are made private in C++
+struct C_API_PRIVATE_ACCESS {
+ static const DataLayout &getDataLayout(const TargetMachine &T) {
+ return T.getDataLayout();
+ }
+};
+}
+
+static TargetMachine *unwrap(LLVMTargetMachineRef P) {
+ return reinterpret_cast<TargetMachine *>(P);
}
-inline Target *unwrap(LLVMTargetRef P) {
+static Target *unwrap(LLVMTargetRef P) {
return reinterpret_cast<Target*>(P);
}
-inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
- return
- reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+static LLVMTargetMachineRef wrap(const TargetMachine *P) {
+ return reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine *>(P));
}
-inline LLVMTargetRef wrap(const Target * P) {
+static LLVMTargetRef wrap(const Target * P) {
return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
}
@@ -69,16 +77,16 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
char **ErrorMessage) {
std::string Error;
-
+
*T = wrap(TargetRegistry::lookupTarget(TripleStr, Error));
-
+
if (!*T) {
if (ErrorMessage)
*ErrorMessage = strdup(Error.c_str());
return 1;
}
-
+
return 0;
}
@@ -145,10 +153,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
CM, OL));
}
-
-void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
- delete unwrap(T);
-}
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
const Target* target = &(unwrap(T)->getTarget());
@@ -170,8 +175,9 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
return strdup(StringRep.c_str());
}
+/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
- return wrap(unwrap(T)->getDataLayout());
+ return wrap(&C_API_PRIVATE_ACCESS::getDataLayout(*unwrap(T)));
}
void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
@@ -190,14 +196,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
std::string error;
- const DataLayout *td = TM->getDataLayout();
-
- if (!td) {
- error = "No DataLayout in TargetMachine";
- *ErrorMessage = strdup(error.c_str());
- return true;
- }
- Mod->setDataLayout(*td);
+ Mod->setDataLayout(TM->createDataLayout());
TargetMachine::CodeGenFileType ft;
switch (codegen) {
@@ -239,7 +238,6 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
SmallString<0> CodeString;
raw_svector_ostream OStream(CodeString);
bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
- OStream.flush();
StringRef Data = OStream.str();
*OutMemBuf =
diff --git a/contrib/llvm/lib/Target/TargetRecip.cpp b/contrib/llvm/lib/Target/TargetRecip.cpp
index 42bc487..d41b643 100644
--- a/contrib/llvm/lib/Target/TargetRecip.cpp
+++ b/contrib/llvm/lib/Target/TargetRecip.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
// the key strings for queries and command-line inputs.
// In addition, the command-line interface recognizes the global parameters
// "all", "none", and "default".
-static const char *RecipOps[] = {
+static const char *const RecipOps[] = {
"divd",
"divf",
"vec-divd",
@@ -46,7 +46,7 @@ TargetRecip::TargetRecip() {
RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
}
-static bool parseRefinementStep(const StringRef &In, size_t &Position,
+static bool parseRefinementStep(StringRef In, size_t &Position,
uint8_t &Value) {
const char RefStepToken = ':';
Position = In.find(RefStepToken);
@@ -175,7 +175,7 @@ TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
parseIndividualParams(Args);
}
-bool TargetRecip::isEnabled(const StringRef &Key) const {
+bool TargetRecip::isEnabled(StringRef Key) const {
ConstRecipIter Iter = RecipMap.find(Key);
assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
assert(Iter->second.Enabled != Uninitialized &&
@@ -183,7 +183,7 @@ bool TargetRecip::isEnabled(const StringRef &Key) const {
return Iter->second.Enabled;
}
-unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
+unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
ConstRecipIter Iter = RecipMap.find(Key);
assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
assert(Iter->second.RefinementSteps != Uninitialized &&
@@ -192,7 +192,7 @@ unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
}
/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
+void TargetRecip::setDefaults(StringRef Key, bool Enable,
unsigned RefSteps) {
if (Key == "all") {
for (auto &KV : RecipMap) {
@@ -213,7 +213,7 @@ void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
bool TargetRecip::operator==(const TargetRecip &Other) const {
for (const auto &KV : RecipMap) {
- const StringRef &Op = KV.first;
+ StringRef Op = KV.first;
const RecipParams &RP = KV.second;
const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
if (RP.RefinementSteps != OtherRP.RefinementSteps)
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index fbb985a..7ce3a00 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -13,7 +13,9 @@
//===----------------------------------------------------------------------===//
#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -21,11 +23,13 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include <cctype>
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+#include "WebAssemblyGenAsmWriter.inc"
+
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
@@ -33,11 +37,93 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
- llvm_unreachable("TODO: implement printRegName");
+ assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
+ // Note that there's an implicit get_local/set_local here!
+ OS << "$" << RegNo;
}
void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
StringRef Annot,
- const MCSubtargetInfo &STI) {
- llvm_unreachable("TODO: implement printInst");
+ const MCSubtargetInfo & /*STI*/) {
+ // Print the instruction (this uses the AsmStrings from the .td files).
+ printInstruction(MI, OS);
+
+ // Print any additional variadic operands.
+ const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+ if (Desc.isVariadic())
+ for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
+ if (i != 0)
+ OS << ", ";
+ printOperand(MI, i, OS);
+ }
+
+ // Print any added annotation.
+ printAnnotation(OS, Annot);
+}
+
+static std::string toString(const APFloat &FP) {
+ static const size_t BufBytes = 128;
+ char buf[BufBytes];
+ if (FP.isNaN())
+ assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) ||
+ FP.bitwiseIsEqual(
+ APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) &&
+ "convertToHexString handles neither SNaN nor NaN payloads");
+ // Use C99's hexadecimal floating-point representation.
+ auto Written = FP.convertToHexString(
+ buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
+ (void)Written;
+ assert(Written != 0);
+ assert(Written < BufBytes);
+ return buf;
+}
+
+void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned WAReg = Op.getReg();
+ if (int(WAReg) >= 0)
+ printRegName(O, WAReg);
+ else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
+ O << "$pop" << (WAReg & INT32_MAX);
+ else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
+ O << "$push" << (WAReg & INT32_MAX);
+ else
+ O << "$discard";
+ // Add a '=' suffix if this is a def.
+ if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
+ O << '=';
+ } else if (Op.isImm()) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::PARAM:
+ case WebAssembly::RESULT:
+ case WebAssembly::LOCAL:
+ O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
+ break;
+ default:
+ O << Op.getImm();
+ break;
+ }
+ } else if (Op.isFPImm())
+ O << toString(APFloat(Op.getFPImm()));
+ else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ Op.getExpr()->print(O, &MAI);
+ }
+}
+
+const char *llvm::WebAssembly::TypeToString(MVT Ty) {
+ switch (Ty.SimpleTy) {
+ case MVT::i32:
+ return "i32";
+ case MVT::i64:
+ return "i64";
+ case MVT::f32:
+ return "f32";
+ case MVT::f64:
+ return "f64";
+ default:
+ llvm_unreachable("unsupported type");
+ }
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 70fcef2..39a16f5 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -16,14 +16,13 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineValueType.h"
namespace llvm {
-class MCOperand;
class MCSubtargetInfo;
-class WebAssemblyInstPrinter : public MCInstPrinter {
+class WebAssemblyInstPrinter final : public MCInstPrinter {
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
@@ -31,8 +30,21 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
const MCSubtargetInfo &STI) override;
+
+ // Used by tblegen code.
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
};
+namespace WebAssembly {
+
+const char *TypeToString(MVT Ty);
+
+} // end namespace WebAssembly
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
new file mode 100644
index 0000000..b158ccb
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -0,0 +1,103 @@
+//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyAsmBackend class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyAsmBackend final : public MCAsmBackend {
+ bool Is64Bit;
+
+public:
+ explicit WebAssemblyAsmBackend(bool Is64Bit)
+ : MCAsmBackend(), Is64Bit(Is64Bit) {}
+ ~WebAssemblyAsmBackend() override {}
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+ // No instruction requires relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ unsigned getNumFixupKinds() const override {
+ // We currently just use the generic fixups in MCFixup.h and don't have any
+ // target-specific fixups.
+ return 0;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {}
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ if (Count == 0)
+ return true;
+
+ // FIXME: Do something.
+ return false;
+}
+
+void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel) const {
+ const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
+ unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8);
+ if (!Value)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+MCObjectWriter *
+WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+ return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+}
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ const Triple &TT,
+ StringRef CPU) {
+ return new WebAssemblyAsmBackend(TT.isArch64Bit());
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
new file mode 100644
index 0000000..c47a3d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -0,0 +1,54 @@
+//===-- WebAssemblyELFObjectWriter.cpp - WebAssembly ELF Writer -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file handles ELF-specific object emission, converting LLVM's
+/// internal fixups into the appropriate relocations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyELFObjectWriter final : public MCELFObjectTargetWriter {
+public:
+ WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+protected:
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+};
+} // end anonymous namespace
+
+// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
+// writing seriously, we should email generic-abi@googlegroups.com and ask
+// for our own ELF code.
+WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
+ uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
+ /*HasRelocationAddend=*/true) {}
+
+unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // FIXME: Do we need our own relocs?
+ return Fixup.getKind();
+}
+
+MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW =
+ new WebAssemblyELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 55346f7..d261779 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
- PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit();
+ PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -41,9 +41,6 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
- HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
-
SupportsDebugInformation = true;
// For now, WebAssembly does not support exceptions.
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index d2b8fb7..2dcf2cd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -15,13 +15,13 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
class Triple;
-class WebAssemblyMCAsmInfo final : public MCAsmInfo {
+class WebAssemblyMCAsmInfo final : public MCAsmInfoELF {
public:
explicit WebAssemblyMCAsmInfo(const Triple &T);
~WebAssemblyMCAsmInfo() override;
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
new file mode 100644
index 0000000..7c6c79e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -0,0 +1,100 @@
+//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyMCCodeEmitter class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
+ const MCRegisterInfo &MRI;
+
+public:
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
+ MCContext &)
+ : MRI(mri) {}
+
+ ~WebAssemblyMCCodeEmitter() override {}
+
+ /// TableGen'erated function for getting the binary encoding for an
+ /// instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Return binary encoding of operand. If the machine operand requires
+ /// relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+};
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
+}
+
+unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
+ const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return MRI.getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+
+ assert(MO.isExpr());
+
+ assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
+
+ assert(false && "FIXME: not implemented yet");
+
+ return 0;
+}
+
+void WebAssemblyMCCodeEmitter::encodeInstruction(
+ const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(false && "FIXME: not implemented yet");
+}
+
+// Encode WebAssembly Memory Operand
+uint64_t
+WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(false && "FIXME: not implemented yet");
+ return 0;
+}
+
+#include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 224aa77..14cd295 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -26,25 +26,40 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-target-desc"
+#define GET_INSTRINFO_MC_DESC
+#include "WebAssemblyGenInstrInfo.inc"
+
#define GET_SUBTARGETINFO_MC_DESC
#include "WebAssemblyGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "WebAssemblyGenRegisterInfo.inc"
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
+static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT) {
- MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
- return MAI;
+ return new WebAssemblyMCAsmInfo(TT);
+}
+
+static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitWebAssemblyMCInstrInfo(X);
+ return X;
+}
+
+static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI) {
- if (SyntaxVariant == 0 || SyntaxVariant == 1)
- return new WebAssemblyInstPrinter(MAI, MII, MRI);
- return nullptr;
+ assert(SyntaxVariant == 0);
+ return new WebAssemblyInstPrinter(MAI, MII, MRI);
}
// Force static initialization.
@@ -53,7 +68,19 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+
+ // Register the object streamer
+ TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+
+ // Register the MC code emitter
+ TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+
+ // Register the ASM Backend
+ TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
}
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index eebf5b7..e78f73e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -16,7 +16,6 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
-#include <string>
namespace llvm {
@@ -34,13 +33,21 @@ class StringRef;
class Target;
class Triple;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheWebAssemblyTarget32;
extern Target TheWebAssemblyTarget64;
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+
MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
+ const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint8_t OSABI);
} // end namespace llvm
@@ -50,6 +57,11 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
#define GET_REGINFO_ENUM
#include "WebAssemblyGenRegisterInfo.inc"
+// Defines symbolic names for the WebAssembly instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
#define GET_SUBTARGETINFO_ENUM
#include "WebAssemblyGenSubtargetInfo.inc"
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index 63e02c4..b97ea45 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -12,6 +12,16 @@ binary encoding of WebAssembly itself:
* https://github.com/WebAssembly/design/blob/master/AstSemantics.md
* https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+The backend is built, tested and archived on the following waterfall:
+ https://build.chromium.org/p/client.wasm.llvm/console
+
+The backend's bringup is done using the GCC torture test suite first since it
+doesn't require C library support. Current known failures are in
+known_gcc_test_failures.txt, all other tests should pass. The waterfall will
+turn red if not. Once most of these pass, further testing will use LLVM's own
+test suite. The tests can be run locally using:
+ github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py
+
Interesting work that remains to be done:
* Write a pass to restructurize irreducible control flow. This needs to be done
before register allocation to be efficient, because it may duplicate basic
@@ -19,8 +29,60 @@ Interesting work that remains to be done:
level. Note that LLVM's GPU code has such a pass, but it linearizes control
flow (e.g. both sides of branches execute and are masked) which is undesirable
for WebAssembly.
-* Basic relooper to expose control flow as an AST.
-* Figure out how to properly use MC for virtual ISAs. This may require some
- refactoring of MC.
+
+//===---------------------------------------------------------------------===//
+
+set_local instructions have a return value. We should (a) model this,
+and (b) write optimizations which take advantage of it. Keep in mind that
+many set_local instructions are implicit!
+
+//===---------------------------------------------------------------------===//
+
+Br, br_if, and tableswitch instructions can support having a value on the
+expression stack across the jump (sometimes). We should (a) model this, and
+(b) extend the stackifier to utilize it.
+
+//===---------------------------------------------------------------------===//
+
+The min/max operators aren't exactly a<b?a:b because of NaN and negative zero
+behavior. The ARM target has the same kind of min/max instructions and has
+implemented optimizations for them; we should do similar optimizations for
+WebAssembly.
+
+//===---------------------------------------------------------------------===//
+
+AArch64 runs SeparateConstOffsetFromGEPPass, followed by EarlyCSE and LICM.
+Would these be useful to run for WebAssembly too? Also, it has an option to
+run SimplifyCFG after running the AtomicExpand pass. Would this be useful for
+us too?
+
+//===---------------------------------------------------------------------===//
+
+When is it profitable to set isAsCheapAsAMove on instructions in WebAssembly?
+
+//===---------------------------------------------------------------------===//
+
+Register stackification uses the EXPR_STACK physical register to impose
+ordering dependencies on instructions with stack operands. This is pessimistic;
+we should consider alternate ways to model stack dependencies.
+
+//===---------------------------------------------------------------------===//
+
+Lots of things could be done in WebAssemblyTargetTransformInfo.cpp. Similarly,
+there are numerous optimization-related hooks that can be overridden in
+WebAssemblyTargetLowering.
+
+//===---------------------------------------------------------------------===//
+
+Instead of the OptimizeReturned pass, which should consider preserving the
+"returned" attribute through to MachineInstrs and extending the StoreResults
+pass to do this optimization on calls too. That would also let the
+WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
+stores.
+
+//===---------------------------------------------------------------------===//
+
+Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
+even when they are translated through intrinsics.
//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp
new file mode 100644
index 0000000..9b718ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.cpp
@@ -0,0 +1,984 @@
+//===-- Relooper.cpp - Top-level interface for WebAssembly ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This implements the Relooper algorithm. This implementation includes
+/// optimizations added since the original academic paper [1] was published.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object
+/// oriented programming systems languages and applications companion
+/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
+///
+//===-------------------------------------------------------------------===//
+
+#include "Relooper.h"
+#include "WebAssembly.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <functional>
+#include <list>
+#include <stack>
+#include <string>
+
+#define DEBUG_TYPE "relooper"
+
+using namespace llvm;
+using namespace Relooper;
+
+static cl::opt<int> RelooperSplittingFactor(
+ "relooper-splitting-factor",
+ cl::desc(
+ "How much to discount code size when deciding whether to split a node"),
+ cl::init(5));
+
+static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
+ "relooper-multiple-switch-threshold",
+ cl::desc(
+ "How many entries to allow in a multiple before we use a switch"),
+ cl::init(10));
+
+static cl::opt<unsigned> RelooperNestingLimit(
+ "relooper-nesting-limit",
+ cl::desc(
+ "How much nesting is acceptable"),
+ cl::init(20));
+
+
+namespace {
+///
+/// Implements the relooper algorithm for a function's blocks.
+///
+/// Implementation details: The Relooper instance has
+/// ownership of the blocks and shapes, and frees them when done.
+///
+struct RelooperAlgorithm {
+ std::deque<Block *> Blocks;
+ std::deque<Shape *> Shapes;
+ Shape *Root;
+ bool MinSize;
+ int BlockIdCounter;
+ int ShapeIdCounter;
+
+ RelooperAlgorithm();
+ ~RelooperAlgorithm();
+
+ void AddBlock(Block *New, int Id = -1);
+
+ // Calculates the shapes
+ void Calculate(Block *Entry);
+
+ // Sets us to try to minimize size
+ void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
+};
+
+struct RelooperAnalysis final : public FunctionPass {
+ static char ID;
+ RelooperAnalysis() : FunctionPass(ID) {}
+ const char *getPassName() const override { return "relooper"; }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnFunction(Function &F) override;
+};
+}
+
+// RelooperAnalysis
+
+char RelooperAnalysis::ID = 0;
+FunctionPass *llvm::createWebAssemblyRelooper() {
+ return new RelooperAnalysis();
+}
+
+bool RelooperAnalysis::runOnFunction(Function &F) {
+ DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
+ RelooperAlgorithm R;
+ // FIXME: remove duplication between relooper's and LLVM's BBs.
+ std::map<const BasicBlock *, Block *> BB2B;
+ std::map<const Block *, const BasicBlock *> B2BB;
+ for (const BasicBlock &BB : F) {
+ // FIXME: getName is wrong here, Code is meant to represent amount of code.
+ // FIXME: use BranchVarInit for switch.
+ Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
+ R.AddBlock(B);
+ assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
+ assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
+ BB2B[&BB] = B;
+ B2BB[B] = &BB;
+ }
+ for (Block *B : R.Blocks) {
+ const BasicBlock *BB = B2BB[B];
+ for (const BasicBlock *Successor : successors(BB))
+ // FIXME: add branch's Condition and Code below.
+ B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
+ }
+ R.Calculate(BB2B[&F.getEntryBlock()]);
+ return false; // Analysis passes don't modify anything.
+}
+
+// Helpers
+
+typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
+typedef std::list<Block *> BlockList;
+
+template <class T, class U>
+static bool contains(const T &container, const U &contained) {
+ return container.count(contained);
+}
+
+
+// Branch
+
+Branch::Branch(const char *ConditionInit, const char *CodeInit)
+ : Ancestor(nullptr), Labeled(true) {
+ // FIXME: move from char* to LLVM data structures
+ Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
+ Code = CodeInit ? strdup(CodeInit) : nullptr;
+}
+
+Branch::~Branch() {
+ // FIXME: move from char* to LLVM data structures
+ free(static_cast<void *>(const_cast<char *>(Condition)));
+ free(static_cast<void *>(const_cast<char *>(Code)));
+}
+
+// Block
+
+Block::Block(const char *CodeInit, const char *BranchVarInit)
+ : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
+ // FIXME: move from char* to LLVM data structures
+ Code = strdup(CodeInit);
+ BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
+}
+
+Block::~Block() {
+ // FIXME: move from char* to LLVM data structures
+ free(static_cast<void *>(const_cast<char *>(Code)));
+ free(static_cast<void *>(const_cast<char *>(BranchVar)));
+}
+
+void Block::AddBranchTo(Block *Target, const char *Condition,
+ const char *Code) {
+ assert(!contains(BranchesOut, Target) &&
+ "cannot add more than one branch to the same target");
+ BranchesOut[Target] = make_unique<Branch>(Condition, Code);
+}
+
+// Relooper
+
+RelooperAlgorithm::RelooperAlgorithm()
+ : Root(nullptr), MinSize(false), BlockIdCounter(1),
+ ShapeIdCounter(0) { // block ID 0 is reserved for clearings
+}
+
+RelooperAlgorithm::~RelooperAlgorithm() {
+ for (auto Curr : Blocks)
+ delete Curr;
+ for (auto Curr : Shapes)
+ delete Curr;
+}
+
+void RelooperAlgorithm::AddBlock(Block *New, int Id) {
+ New->Id = Id == -1 ? BlockIdCounter++ : Id;
+ Blocks.push_back(New);
+}
+
+struct RelooperRecursor {
+ RelooperAlgorithm *Parent;
+ RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+};
+
+void RelooperAlgorithm::Calculate(Block *Entry) {
+ // Scan and optimize the input
+ struct PreOptimizer : public RelooperRecursor {
+ PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+ BlockSet Live;
+
+ void FindLive(Block *Root) {
+ BlockList ToInvestigate;
+ ToInvestigate.push_back(Root);
+ while (!ToInvestigate.empty()) {
+ Block *Curr = ToInvestigate.front();
+ ToInvestigate.pop_front();
+ if (contains(Live, Curr))
+ continue;
+ Live.insert(Curr);
+ for (const auto &iter : Curr->BranchesOut)
+ ToInvestigate.push_back(iter.first);
+ }
+ }
+
+ // If a block has multiple entries but no exits, and it is small enough, it
+ // is useful to split it. A common example is a C++ function where
+ // everything ends up at a final exit block and does some RAII cleanup.
+ // Without splitting, we will be forced to introduce labelled loops to
+ // allow reaching the final block
+ void SplitDeadEnds() {
+ unsigned TotalCodeSize = 0;
+ for (const auto &Curr : Live) {
+ TotalCodeSize += strlen(Curr->Code);
+ }
+ BlockSet Splits;
+ BlockSet Removed;
+ for (const auto &Original : Live) {
+ if (Original->BranchesIn.size() <= 1 ||
+ !Original->BranchesOut.empty())
+ continue; // only dead ends, for now
+ if (contains(Original->BranchesOut, Original))
+ continue; // cannot split a looping node
+ if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
+ TotalCodeSize / RelooperSplittingFactor)
+ continue; // if splitting increases raw code size by a significant
+ // amount, abort
+ // Split the node (for simplicity, we replace all the blocks, even
+ // though we could have reused the original)
+ DEBUG(dbgs() << " Splitting '" << Original->Code << "'\n");
+ for (const auto &Prior : Original->BranchesIn) {
+ Block *Split = new Block(Original->Code, Original->BranchVar);
+ Parent->AddBlock(Split, Original->Id);
+ Split->BranchesIn.insert(Prior);
+ std::unique_ptr<Branch> Details;
+ Details.swap(Prior->BranchesOut[Original]);
+ Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
+ Details->Code);
+ for (const auto &iter : Original->BranchesOut) {
+ Block *Post = iter.first;
+ Branch *Details = iter.second.get();
+ Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
+ Details->Code);
+ Post->BranchesIn.insert(Split);
+ }
+ Splits.insert(Split);
+ Removed.insert(Original);
+ }
+ for (const auto &iter : Original->BranchesOut) {
+ Block *Post = iter.first;
+ Post->BranchesIn.remove(Original);
+ }
+ }
+ for (const auto &iter : Splits)
+ Live.insert(iter);
+ for (const auto &iter : Removed)
+ Live.remove(iter);
+ }
+ };
+ PreOptimizer Pre(this);
+ Pre.FindLive(Entry);
+
+ // Add incoming branches from live blocks, ignoring dead code
+ for (unsigned i = 0; i < Blocks.size(); i++) {
+ Block *Curr = Blocks[i];
+ if (!contains(Pre.Live, Curr))
+ continue;
+ for (const auto &iter : Curr->BranchesOut)
+ iter.first->BranchesIn.insert(Curr);
+ }
+
+ if (!MinSize)
+ Pre.SplitDeadEnds();
+
+ // Recursively process the graph
+
+ struct Analyzer : public RelooperRecursor {
+ Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+
+ // Add a shape to the list of shapes in this Relooper calculation
+ void Notice(Shape *New) {
+ New->Id = Parent->ShapeIdCounter++;
+ Parent->Shapes.push_back(New);
+ }
+
+ // Create a list of entries from a block. If LimitTo is provided, only
+ // results in that set will appear
+ void GetBlocksOut(Block *Source, BlockSet &Entries,
+ BlockSet *LimitTo = nullptr) {
+ for (const auto &iter : Source->BranchesOut)
+ if (!LimitTo || contains(*LimitTo, iter.first))
+ Entries.insert(iter.first);
+ }
+
+ // Converts/processes all branchings to a specific target
+ void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
+ BlockSet &From) {
+ DEBUG(dbgs() << " Solipsize '" << Target->Code << "' type " << Type
+ << "\n");
+ for (auto iter = Target->BranchesIn.begin();
+ iter != Target->BranchesIn.end();) {
+ Block *Prior = *iter;
+ if (!contains(From, Prior)) {
+ iter++;
+ continue;
+ }
+ std::unique_ptr<Branch> PriorOut;
+ PriorOut.swap(Prior->BranchesOut[Target]);
+ PriorOut->Ancestor = Ancestor;
+ PriorOut->Type = Type;
+ if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
+ Multiple->Breaks++; // We are breaking out of this Multiple, so need a
+ // loop
+ iter++; // carefully increment iter before erasing
+ Target->BranchesIn.remove(Prior);
+ Target->ProcessedBranchesIn.insert(Prior);
+ Prior->ProcessedBranchesOut[Target].swap(PriorOut);
+ }
+ }
+
+ Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
+ DEBUG(dbgs() << " MakeSimple inner block '" << Inner->Code << "'\n");
+ SimpleShape *Simple = new SimpleShape;
+ Notice(Simple);
+ Simple->Inner = Inner;
+ Inner->Parent = Simple;
+ if (Blocks.size() > 1) {
+ Blocks.remove(Inner);
+ GetBlocksOut(Inner, NextEntries, &Blocks);
+ BlockSet JustInner;
+ JustInner.insert(Inner);
+ for (const auto &iter : NextEntries)
+ Solipsize(iter, Branch::Direct, Simple, JustInner);
+ }
+ return Simple;
+ }
+
+ Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
+ BlockSet &NextEntries) {
+ // Find the inner blocks in this loop. Proceed backwards from the entries
+ // until
+ // you reach a seen block, collecting as you go.
+ BlockSet InnerBlocks;
+ BlockSet Queue = Entries;
+ while (!Queue.empty()) {
+ Block *Curr = *(Queue.begin());
+ Queue.remove(*Queue.begin());
+ if (!contains(InnerBlocks, Curr)) {
+ // This element is new, mark it as inner and remove from outer
+ InnerBlocks.insert(Curr);
+ Blocks.remove(Curr);
+ // Add the elements prior to it
+ for (const auto &iter : Curr->BranchesIn)
+ Queue.insert(iter);
+ }
+ }
+ assert(!InnerBlocks.empty());
+
+ for (const auto &Curr : InnerBlocks) {
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *Possible = iter.first;
+ if (!contains(InnerBlocks, Possible))
+ NextEntries.insert(Possible);
+ }
+ }
+
+ LoopShape *Loop = new LoopShape();
+ Notice(Loop);
+
+ // Solipsize the loop, replacing with break/continue and marking branches
+ // as Processed (will not affect later calculations)
+ // A. Branches to the loop entries become a continue to this shape
+ for (const auto &iter : Entries)
+ Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
+ // B. Branches to outside the loop (a next entry) become breaks on this
+ // shape
+ for (const auto &iter : NextEntries)
+ Solipsize(iter, Branch::Break, Loop, InnerBlocks);
+ // Finish up
+ Shape *Inner = Process(InnerBlocks, Entries, nullptr);
+ Loop->Inner = Inner;
+ return Loop;
+ }
+
+ // For each entry, find the independent group reachable by it. The
+ // independent group is the entry itself, plus all the blocks it can
+ // reach that cannot be directly reached by another entry. Note that we
+ // ignore directly reaching the entry itself by another entry.
+ // @param Ignore - previous blocks that are irrelevant
+ void FindIndependentGroups(BlockSet &Entries,
+ BlockBlockSetMap &IndependentGroups,
+ BlockSet *Ignore = nullptr) {
+ typedef std::map<Block *, Block *> BlockBlockMap;
+
+ struct HelperClass {
+ BlockBlockSetMap &IndependentGroups;
+ BlockBlockMap Ownership; // For each block, which entry it belongs to.
+ // We have reached it from there.
+
+ HelperClass(BlockBlockSetMap &IndependentGroupsInit)
+ : IndependentGroups(IndependentGroupsInit) {}
+ void InvalidateWithChildren(Block *New) {
+ // Being in the list means you need to be invalidated
+ BlockList ToInvalidate;
+ ToInvalidate.push_back(New);
+ while (!ToInvalidate.empty()) {
+ Block *Invalidatee = ToInvalidate.front();
+ ToInvalidate.pop_front();
+ Block *Owner = Ownership[Invalidatee];
+ // Owner may have been invalidated, do not add to
+ // IndependentGroups!
+ if (contains(IndependentGroups, Owner))
+ IndependentGroups[Owner].remove(Invalidatee);
+ if (Ownership[Invalidatee]) { // may have been seen before and
+ // invalidated already
+ Ownership[Invalidatee] = nullptr;
+ for (const auto &iter : Invalidatee->BranchesOut) {
+ Block *Target = iter.first;
+ BlockBlockMap::iterator Known = Ownership.find(Target);
+ if (Known != Ownership.end()) {
+ Block *TargetOwner = Known->second;
+ if (TargetOwner)
+ ToInvalidate.push_back(Target);
+ }
+ }
+ }
+ }
+ }
+ };
+ HelperClass Helper(IndependentGroups);
+
+ // We flow out from each of the entries, simultaneously.
+ // When we reach a new block, we add it as belonging to the one we got to
+ // it from.
+ // If we reach a new block that is already marked as belonging to someone,
+ // it is reachable by two entries and is not valid for any of them.
+ // Remove it and all it can reach that have been visited.
+
+ // Being in the queue means we just added this item, and
+ // we need to add its children
+ BlockList Queue;
+ for (const auto &Entry : Entries) {
+ Helper.Ownership[Entry] = Entry;
+ IndependentGroups[Entry].insert(Entry);
+ Queue.push_back(Entry);
+ }
+ while (!Queue.empty()) {
+ Block *Curr = Queue.front();
+ Queue.pop_front();
+ Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
+ // map if we are in the queue
+ if (!Owner)
+ continue; // we have been invalidated meanwhile after being reached
+ // from two entries
+ // Add all children
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *New = iter.first;
+ BlockBlockMap::iterator Known = Helper.Ownership.find(New);
+ if (Known == Helper.Ownership.end()) {
+ // New node. Add it, and put it in the queue
+ Helper.Ownership[New] = Owner;
+ IndependentGroups[Owner].insert(New);
+ Queue.push_back(New);
+ continue;
+ }
+ Block *NewOwner = Known->second;
+ if (!NewOwner)
+ continue; // We reached an invalidated node
+ if (NewOwner != Owner)
+ // Invalidate this and all reachable that we have seen - we reached
+ // this from two locations
+ Helper.InvalidateWithChildren(New);
+ // otherwise, we have the same owner, so do nothing
+ }
+ }
+
+ // Having processed all the interesting blocks, we remain with just one
+ // potential issue:
+ // If a->b, and a was invalidated, but then b was later reached by
+ // someone else, we must invalidate b. To check for this, we go over all
+ // elements in the independent groups, if an element has a parent which
+ // does *not* have the same owner, we/ must remove it and all its
+ // children.
+
+ for (const auto &iter : Entries) {
+ BlockSet &CurrGroup = IndependentGroups[iter];
+ BlockList ToInvalidate;
+ for (const auto &iter : CurrGroup) {
+ Block *Child = iter;
+ for (const auto &iter : Child->BranchesIn) {
+ Block *Parent = iter;
+ if (Ignore && contains(*Ignore, Parent))
+ continue;
+ if (Helper.Ownership[Parent] != Helper.Ownership[Child])
+ ToInvalidate.push_back(Child);
+ }
+ }
+ while (!ToInvalidate.empty()) {
+ Block *Invalidatee = ToInvalidate.front();
+ ToInvalidate.pop_front();
+ Helper.InvalidateWithChildren(Invalidatee);
+ }
+ }
+
+ // Remove empty groups
+ for (const auto &iter : Entries)
+ if (IndependentGroups[iter].empty())
+ IndependentGroups.erase(iter);
+ }
+
+ Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
+ BlockBlockSetMap &IndependentGroups, Shape *Prev,
+ BlockSet &NextEntries) {
+ bool Fused = isa<SimpleShape>(Prev);
+ MultipleShape *Multiple = new MultipleShape();
+ Notice(Multiple);
+ BlockSet CurrEntries;
+ for (auto &iter : IndependentGroups) {
+ Block *CurrEntry = iter.first;
+ BlockSet &CurrBlocks = iter.second;
+ // Create inner block
+ CurrEntries.clear();
+ CurrEntries.insert(CurrEntry);
+ for (const auto &CurrInner : CurrBlocks) {
+ // Remove the block from the remaining blocks
+ Blocks.remove(CurrInner);
+ // Find new next entries and fix branches to them
+ for (auto iter = CurrInner->BranchesOut.begin();
+ iter != CurrInner->BranchesOut.end();) {
+ Block *CurrTarget = iter->first;
+ auto Next = iter;
+ Next++;
+ if (!contains(CurrBlocks, CurrTarget)) {
+ NextEntries.insert(CurrTarget);
+ Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
+ }
+ iter = Next; // increment carefully because Solipsize can remove us
+ }
+ }
+ Multiple->InnerMap[CurrEntry->Id] =
+ Process(CurrBlocks, CurrEntries, nullptr);
+ // If we are not fused, then our entries will actually be checked
+ if (!Fused)
+ CurrEntry->IsCheckedMultipleEntry = true;
+ }
+ // Add entries not handled as next entries, they are deferred
+ for (const auto &Entry : Entries)
+ if (!contains(IndependentGroups, Entry))
+ NextEntries.insert(Entry);
+ // The multiple has been created, we can decide how to implement it
+ if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
+ Multiple->UseSwitch = true;
+ Multiple->Breaks++; // switch captures breaks
+ }
+ return Multiple;
+ }
+
+ // Main function.
+ // Process a set of blocks with specified entries, returns a shape
+ // The Make* functions receive a NextEntries. If they fill it with data,
+ // those are the entries for the ->Next block on them, and the blocks
+ // are what remains in Blocks (which Make* modify). In this way
+ // we avoid recursing on Next (imagine a long chain of Simples, if we
+ // recursed we could blow the stack).
+ Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
+ BlockSet *Entries = &InitialEntries;
+ BlockSet TempEntries[2];
+ int CurrTempIndex = 0;
+ BlockSet *NextEntries;
+ Shape *Ret = nullptr;
+
+ auto Make = [&](Shape *Temp) {
+ if (Prev)
+ Prev->Next = Temp;
+ if (!Ret)
+ Ret = Temp;
+ Prev = Temp;
+ Entries = NextEntries;
+ };
+
+ while (1) {
+ CurrTempIndex = 1 - CurrTempIndex;
+ NextEntries = &TempEntries[CurrTempIndex];
+ NextEntries->clear();
+
+ if (Entries->empty())
+ return Ret;
+ if (Entries->size() == 1) {
+ Block *Curr = *(Entries->begin());
+ if (Curr->BranchesIn.empty()) {
+ // One entry, no looping ==> Simple
+ Make(MakeSimple(Blocks, Curr, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ // One entry, looping ==> Loop
+ Make(MakeLoop(Blocks, *Entries, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+
+ // More than one entry, try to eliminate through a Multiple groups of
+ // independent blocks from an entry/ies. It is important to remove
+ // through multiples as opposed to looping since the former is more
+ // performant.
+ BlockBlockSetMap IndependentGroups;
+ FindIndependentGroups(*Entries, IndependentGroups);
+
+ if (!IndependentGroups.empty()) {
+ // We can handle a group in a multiple if its entry cannot be reached
+ // by another group.
+ // Note that it might be reachable by itself - a loop. But that is
+ // fine, we will create a loop inside the multiple block (which
+ // is the performant order to do it).
+ for (auto iter = IndependentGroups.begin();
+ iter != IndependentGroups.end();) {
+ Block *Entry = iter->first;
+ BlockSet &Group = iter->second;
+ auto curr = iter++; // iterate carefully, we may delete
+ for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
+ iterBranch != Entry->BranchesIn.end(); iterBranch++) {
+ Block *Origin = *iterBranch;
+ if (!contains(Group, Origin)) {
+ // Reached from outside the group, so we cannot handle this
+ IndependentGroups.erase(curr);
+ break;
+ }
+ }
+ }
+
+ // As an optimization, if we have 2 independent groups, and one is a
+ // small dead end, we can handle only that dead end.
+ // The other then becomes a Next - without nesting in the code and
+ // recursion in the analysis.
+ // TODO: if the larger is the only dead end, handle that too
+ // TODO: handle >2 groups
+ // TODO: handle not just dead ends, but also that do not branch to the
+ // NextEntries. However, must be careful there since we create a
+ // Next, and that Next can prevent eliminating a break (since we no
+ // longer naturally reach the same place), which may necessitate a
+ // one-time loop, which makes the unnesting pointless.
+ if (IndependentGroups.size() == 2) {
+ // Find the smaller one
+ auto iter = IndependentGroups.begin();
+ Block *SmallEntry = iter->first;
+ auto SmallSize = iter->second.size();
+ iter++;
+ Block *LargeEntry = iter->first;
+ auto LargeSize = iter->second.size();
+ if (SmallSize != LargeSize) { // ignore the case where they are
+ // identical - keep things symmetrical
+ // there
+ if (SmallSize > LargeSize) {
+ Block *Temp = SmallEntry;
+ SmallEntry = LargeEntry;
+ LargeEntry = Temp; // Note: we did not flip the Sizes too, they
+ // are now invalid. TODO: use the smaller
+ // size as a limit?
+ }
+ // Check if dead end
+ bool DeadEnd = true;
+ BlockSet &SmallGroup = IndependentGroups[SmallEntry];
+ for (const auto &Curr : SmallGroup) {
+ for (const auto &iter : Curr->BranchesOut) {
+ Block *Target = iter.first;
+ if (!contains(SmallGroup, Target)) {
+ DeadEnd = false;
+ break;
+ }
+ }
+ if (!DeadEnd)
+ break;
+ }
+ if (DeadEnd)
+ IndependentGroups.erase(LargeEntry);
+ }
+ }
+
+ if (!IndependentGroups.empty())
+ // Some groups removable ==> Multiple
+ Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
+ *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ // No independent groups, must be loopable ==> Loop
+ Make(MakeLoop(Blocks, *Entries, *NextEntries));
+ if (NextEntries->empty())
+ return Ret;
+ continue;
+ }
+ }
+ };
+
+ // Main
+
+ BlockSet AllBlocks;
+ for (const auto &Curr : Pre.Live) {
+ AllBlocks.insert(Curr);
+ }
+
+ BlockSet Entries;
+ Entries.insert(Entry);
+ Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
+ assert(Root);
+
+ ///
+ /// Relooper post-optimizer
+ ///
+ struct PostOptimizer {
+ RelooperAlgorithm *Parent;
+ std::stack<Shape *> LoopStack;
+
+ PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+
+ void ShapeSwitch(Shape* var,
+ std::function<void (SimpleShape*)> simple,
+ std::function<void (MultipleShape*)> multiple,
+ std::function<void (LoopShape*)> loop) {
+ switch (var->getKind()) {
+ case Shape::SK_Simple: {
+ simple(cast<SimpleShape>(var));
+ break;
+ }
+ case Shape::SK_Multiple: {
+ multiple(cast<MultipleShape>(var));
+ break;
+ }
+ case Shape::SK_Loop: {
+ loop(cast<LoopShape>(var));
+ break;
+ }
+ }
+ }
+
+ // Find the blocks that natural control flow can get us directly to, or
+ // through a multiple that we ignore
+ void FollowNaturalFlow(Shape *S, BlockSet &Out) {
+ ShapeSwitch(S, [&](SimpleShape* Simple) {
+ Out.insert(Simple->Inner);
+ }, [&](MultipleShape* Multiple) {
+ for (const auto &iter : Multiple->InnerMap) {
+ FollowNaturalFlow(iter.second, Out);
+ }
+ FollowNaturalFlow(Multiple->Next, Out);
+ }, [&](LoopShape* Loop) {
+ FollowNaturalFlow(Loop->Inner, Out);
+ });
+ }
+
+ void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
+ if (Root->Next) {
+ Root->Natural = Root->Next;
+ FindNaturals(Root->Next, Otherwise);
+ } else {
+ Root->Natural = Otherwise;
+ }
+
+ ShapeSwitch(Root, [](SimpleShape* Simple) {
+ }, [&](MultipleShape* Multiple) {
+ for (const auto &iter : Multiple->InnerMap) {
+ FindNaturals(iter.second, Root->Natural);
+ }
+ }, [&](LoopShape* Loop){
+ FindNaturals(Loop->Inner, Loop->Inner);
+ });
+ }
+
+ // Remove unneeded breaks and continues.
+ // A flow operation is trivially unneeded if the shape we naturally get to
+ // by normal code execution is the same as the flow forces us to.
+ void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
+ LoopShape *LastLoop = nullptr,
+ unsigned Depth = 0) {
+ BlockSet NaturalBlocks;
+ FollowNaturalFlow(Natural, NaturalBlocks);
+ Shape *Next = Root;
+ while (Next) {
+ Root = Next;
+ Next = nullptr;
+ ShapeSwitch(
+ Root,
+ [&](SimpleShape* Simple) {
+ if (Simple->Inner->BranchVar)
+ LastLoop =
+ nullptr; // a switch clears out the loop (TODO: only for
+ // breaks, not continue)
+
+ if (Simple->Next) {
+ if (!Simple->Inner->BranchVar &&
+ Simple->Inner->ProcessedBranchesOut.size() == 2 &&
+ Depth < RelooperNestingLimit) {
+ // If there is a next block, we already know at Simple
+ // creation time to make direct branches, and we can do
+ // nothing more in general. But, we try to optimize the
+ // case of a break and a direct: This would normally be
+ // if (break?) { break; } ..
+ // but if we make sure to nest the else, we can save the
+ // break,
+ // if (!break?) { .. }
+ // This is also better because the more canonical nested
+ // form is easier to further optimize later. The
+ // downside is more nesting, which adds to size in builds with
+ // whitespace.
+ // Note that we avoid switches, as it complicates control flow
+ // and is not relevant for the common case we optimize here.
+ bool Found = false;
+ bool Abort = false;
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Block *Target = iter.first;
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break) {
+ Found = true;
+ if (!contains(NaturalBlocks, Target))
+ Abort = true;
+ } else if (Details->Type != Branch::Direct)
+ Abort = true;
+ }
+ if (Found && !Abort) {
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break) {
+ Details->Type = Branch::Direct;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ } else {
+ assert(Details->Type == Branch::Direct);
+ Details->Type = Branch::Nested;
+ }
+ }
+ }
+ Depth++; // this optimization increases depth, for us and all
+ // our next chain (i.e., until this call returns)
+ }
+ Next = Simple->Next;
+ } else {
+ // If there is no next then Natural is where we will
+ // go to by doing nothing, so we can potentially optimize some
+ // branches to direct.
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Block *Target = iter.first;
+ Branch *Details = iter.second.get();
+ if (Details->Type != Branch::Direct &&
+ contains(NaturalBlocks,
+ Target)) { // note: cannot handle split blocks
+ Details->Type = Branch::Direct;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ } else if (Details->Type == Branch::Break && LastLoop &&
+ LastLoop->Natural == Details->Ancestor->Natural) {
+ // it is important to simplify breaks, as simpler breaks
+ // enable other optimizations
+ Details->Labeled = false;
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor))
+ Multiple->Breaks--;
+ }
+ }
+ }
+ }, [&](MultipleShape* Multiple)
+ {
+ for (const auto &iter : Multiple->InnerMap) {
+ RemoveUnneededFlows(iter.second, Multiple->Next,
+ Multiple->Breaks ? nullptr : LastLoop,
+ Depth + 1);
+ }
+ Next = Multiple->Next;
+ }, [&](LoopShape* Loop)
+ {
+ RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
+ Next = Loop->Next;
+ });
+ }
+ }
+
+ // After we know which loops exist, we can calculate which need to be
+ // labeled
+ void FindLabeledLoops(Shape *Root) {
+ Shape *Next = Root;
+ while (Next) {
+ Root = Next;
+ Next = nullptr;
+
+ ShapeSwitch(
+ Root,
+ [&](SimpleShape *Simple) {
+ MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
+ // If we are fusing a Multiple with a loop into this Simple, then
+ // visit it now
+ if (Fused && Fused->Breaks)
+ LoopStack.push(Fused);
+ if (Simple->Inner->BranchVar)
+ LoopStack.push(nullptr); // a switch means breaks are now useless,
+ // push a dummy
+ if (Fused) {
+ if (Fused->UseSwitch)
+ LoopStack.push(nullptr); // a switch means breaks are now
+ // useless, push a dummy
+ for (const auto &iter : Fused->InnerMap) {
+ FindLabeledLoops(iter.second);
+ }
+ }
+ for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+ Branch *Details = iter.second.get();
+ if (Details->Type == Branch::Break ||
+ Details->Type == Branch::Continue) {
+ assert(!LoopStack.empty());
+ if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
+ if (MultipleShape *Multiple =
+ dyn_cast<MultipleShape>(Details->Ancestor)) {
+ Multiple->Labeled = true;
+ } else {
+ LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
+ Loop->Labeled = true;
+ }
+ } else {
+ Details->Labeled = false;
+ }
+ }
+ if (Fused && Fused->UseSwitch)
+ LoopStack.pop();
+ if (Simple->Inner->BranchVar)
+ LoopStack.pop();
+ if (Fused && Fused->Breaks)
+ LoopStack.pop();
+ if (Fused)
+ Next = Fused->Next;
+ else
+ Next = Root->Next;
+ }
+ }
+ , [&](MultipleShape* Multiple) {
+ if (Multiple->Breaks)
+ LoopStack.push(Multiple);
+ for (const auto &iter : Multiple->InnerMap)
+ FindLabeledLoops(iter.second);
+ if (Multiple->Breaks)
+ LoopStack.pop();
+ Next = Root->Next;
+ }
+ , [&](LoopShape* Loop) {
+ LoopStack.push(Loop);
+ FindLabeledLoops(Loop->Inner);
+ LoopStack.pop();
+ Next = Root->Next;
+ });
+ }
+ }
+
+ void Process(Shape * Root) {
+ FindNaturals(Root);
+ RemoveUnneededFlows(Root);
+ FindLabeledLoops(Root);
+ }
+ };
+
+ PostOptimizer(this).Process(Root);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/Relooper.h b/contrib/llvm/lib/Target/WebAssembly/Relooper.h
new file mode 100644
index 0000000..7c564de
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/Relooper.h
@@ -0,0 +1,186 @@
+//===-- Relooper.h - Top-level interface for WebAssembly ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This defines an optimized C++ implemention of the Relooper
+/// algorithm, originally developed as part of Emscripten, which
+/// generates a structured AST from arbitrary control flow.
+///
+//===-------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Casting.h"
+
+#include <cassert>
+#include <cstdarg>
+#include <cstdio>
+#include <deque>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+
+namespace llvm {
+
+namespace Relooper {
+
+struct Block;
+struct Shape;
+
+///
+/// Info about a branching from one block to another
+///
+struct Branch {
+ enum FlowType {
+ Direct = 0, // We will directly reach the right location through other
+ // means, no need for continue or break
+ Break = 1,
+ Continue = 2,
+ Nested = 3 // This code is directly reached, but we must be careful to
+ // ensure it is nested in an if - it is not reached
+ // unconditionally, other code paths exist alongside it that we need to make
+ // sure do not intertwine
+ };
+ Shape
+ *Ancestor; // If not nullptr, this shape is the relevant one for purposes
+ // of getting to the target block. We break or continue on it
+ Branch::FlowType
+ Type; // If Ancestor is not nullptr, this says whether to break or
+ // continue
+ bool Labeled; // If a break or continue, whether we need to use a label
+ const char *Condition; // The condition for which we branch. For example,
+ // "my_var == 1". Conditions are checked one by one.
+ // One of the conditions should have nullptr as the
+ // condition, in which case it is the default
+ // FIXME: move from char* to LLVM data structures
+ const char *Code; // If provided, code that is run right before the branch is
+ // taken. This is useful for phis
+ // FIXME: move from char* to LLVM data structures
+
+ Branch(const char *ConditionInit, const char *CodeInit = nullptr);
+ ~Branch();
+};
+
+typedef SetVector<Block *> BlockSet;
+typedef MapVector<Block *, Branch *> BlockBranchMap;
+typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap;
+
+///
+/// Represents a basic block of code - some instructions that end with a
+/// control flow modifier (a branch, return or throw).
+///
+struct Block {
+ // Branches become processed after we finish the shape relevant to them. For
+ // example, when we recreate a loop, branches to the loop start become
+ // continues and are now processed. When we calculate what shape to generate
+ // from a set of blocks, we ignore processed branches. Blocks own the Branch
+ // objects they use, and destroy them when done.
+ OwningBlockBranchMap BranchesOut;
+ BlockSet BranchesIn;
+ OwningBlockBranchMap ProcessedBranchesOut;
+ BlockSet ProcessedBranchesIn;
+ Shape *Parent; // The shape we are directly inside
+ int Id; // A unique identifier, defined when added to relooper. Note that this
+ // uniquely identifies a *logical* block - if we split it, the two
+ // instances have the same content *and* the same Id
+ const char *Code; // The string representation of the code in this block.
+ // Owning pointer (we copy the input)
+ // FIXME: move from char* to LLVM data structures
+ const char *BranchVar; // A variable whose value determines where we go; if
+ // this is not nullptr, emit a switch on that variable
+ // FIXME: move from char* to LLVM data structures
+ bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
+ // us requires setting the label variable
+
+ Block(const char *CodeInit, const char *BranchVarInit);
+ ~Block();
+
+ void AddBranchTo(Block *Target, const char *Condition,
+ const char *Code = nullptr);
+};
+
+///
+/// Represents a structured control flow shape
+///
+struct Shape {
+ int Id; // A unique identifier. Used to identify loops, labels are Lx where x
+ // is the Id. Defined when added to relooper
+ Shape *Next; // The shape that will appear in the code right after this one
+ Shape *Natural; // The shape that control flow gets to naturally (if there is
+ // Next, then this is Next)
+
+ /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
+ enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop };
+
+private:
+ ShapeKind Kind;
+
+public:
+ ShapeKind getKind() const { return Kind; }
+
+ Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {}
+};
+
+///
+/// Simple: No control flow at all, just instructions.
+///
+struct SimpleShape : public Shape {
+ Block *Inner;
+
+ SimpleShape() : Shape(SK_Simple), Inner(nullptr) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Simple; }
+};
+
+///
+/// A shape that may be implemented with a labeled loop.
+///
+struct LabeledShape : public Shape {
+ bool Labeled; // If we have a loop, whether it needs to be labeled
+
+ LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {}
+};
+
+// Blocks with the same id were split and are identical, so we just care about
+// ids in Multiple entries
+typedef std::map<int, Shape *> IdShapeMap;
+
+///
+/// Multiple: A shape with more than one entry. If the next block to
+/// be entered is among them, we run it and continue to
+/// the next shape, otherwise we continue immediately to the
+/// next shape.
+///
+struct MultipleShape : public LabeledShape {
+ IdShapeMap InnerMap; // entry block ID -> shape
+ int Breaks; // If we have branches on us, we need a loop (or a switch). This
+ // is a counter of requirements,
+ // if we optimize it to 0, the loop is unneeded
+ bool UseSwitch; // Whether to switch on label as opposed to an if-else chain
+
+ MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; }
+};
+
+///
+/// Loop: An infinite loop.
+///
+struct LoopShape : public LabeledShape {
+ Shape *Inner;
+
+ LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {}
+
+ static bool classof(const Shape *S) { return S->getKind() == SK_Loop; }
+};
+
+} // namespace Relooper
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 3ff19d4..e972da5 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -23,8 +23,22 @@ namespace llvm {
class WebAssemblyTargetMachine;
class FunctionPass;
+FunctionPass *createWebAssemblyOptimizeReturned();
+
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+FunctionPass *createWebAssemblyArgumentMove();
+
+FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyRegStackify();
+FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyPEI();
+FunctionPass *createWebAssemblyCFGStackify();
+FunctionPass *createWebAssemblyLowerBrUnless();
+FunctionPass *createWebAssemblyRegNumbering();
+FunctionPass *createWebAssemblyPeephole();
+
+FunctionPass *createWebAssemblyRelooper();
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index a123bf6..551ad93 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -6,10 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This is a target description file for the WebAssembly architecture, which is
-// also known as "wasm".
-//
+///
+/// \file
+/// \brief This is a target description file for the WebAssembly architecture,
+/// which is also known as "wasm".
+///
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -50,6 +51,9 @@ def WebAssemblyInstrInfo : InstrInfo;
// Minimal Viable Product.
def : ProcessorModel<"mvp", NoSchedModel, []>;
+// Generic processor: latest stable version.
+def : ProcessorModel<"generic", NoSchedModel, []>;
+
// Latest and greatest experimental version of WebAssembly. Bugs included!
def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
new file mode 100644
index 0000000..3893c40
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -0,0 +1,110 @@
+//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file moves ARGUMENT instructions after ScheduleDAG scheduling.
+///
+/// Arguments are really live-in registers, however, since we use virtual
+/// registers and LLVM doesn't support live-in virtual registers, we're
+/// currently making do with ARGUMENT instructions which are placed at the top
+/// of the entry block. The trick is to get them to *stay* at the top of the
+/// entry block.
+///
+/// The ARGUMENTS physical register keeps these instructions pinned in place
+/// during liveness-aware CodeGen passes, however one thing which does not
+/// respect this is the ScheduleDAG scheduler. This pass is therefore run
+/// immediately after that.
+///
+/// This is all hopefully a temporary solution until we find a better solution
+/// for describing the live-in nature of arguments.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-argument-move"
+
+namespace {
+class WebAssemblyArgumentMove final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyArgumentMove() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Argument Move";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyArgumentMove::ID = 0;
+FunctionPass *llvm::createWebAssemblyArgumentMove() {
+ return new WebAssemblyArgumentMove();
+}
+
+/// Test whether the given instruction is an ARGUMENT.
+static bool IsArgument(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Argument Move **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ bool Changed = false;
+ MachineBasicBlock &EntryMBB = MF.front();
+ MachineBasicBlock::iterator InsertPt = EntryMBB.end();
+
+ // Look for the first NonArg instruction.
+ for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) {
+ MachineInstr *MI = MII;
+ if (!IsArgument(MI)) {
+ InsertPt = MII;
+ break;
+ }
+ }
+
+ // Now move any argument instructions later in the block
+ // to before our first NonArg instruction.
+ for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) {
+ MachineInstr *MI = I;
+ if (IsArgument(MI)) {
+ EntryMBB.insert(InsertPt, MI->removeFromParent());
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
new file mode 100644
index 0000000..0d2b4d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -0,0 +1,285 @@
+//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a printer that converts from our internal
+/// representation of machine-dependent LLVM code to the WebAssembly assembly
+/// language.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyRegisterInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+
+class WebAssemblyAsmPrinter final : public AsmPrinter {
+ const MachineRegisterInfo *MRI;
+ const WebAssemblyFunctionInfo *MFI;
+
+public:
+ WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
+
+private:
+ const char *getPassName() const override {
+ return "WebAssembly Assembly Printer";
+ }
+
+ //===------------------------------------------------------------------===//
+ // MachineFunctionPass Implementation.
+ //===------------------------------------------------------------------===//
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ MRI = &MF.getRegInfo();
+ MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
+
+ //===------------------------------------------------------------------===//
+ // AsmPrinter Implementation.
+ //===------------------------------------------------------------------===//
+
+ void EmitJumpTableInfo() override;
+ void EmitConstantPool() override;
+ void EmitFunctionBodyStart() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+
+ MVT getRegType(unsigned RegNo) const;
+ const char *toString(MVT VT) const;
+ std::string regToString(const MachineOperand &MO);
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Helpers.
+//===----------------------------------------------------------------------===//
+
+MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
+ const TargetRegisterClass *TRC =
+ TargetRegisterInfo::isVirtualRegister(RegNo) ?
+ MRI->getRegClass(RegNo) :
+ MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+ for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+ if (TRC->hasType(T))
+ return T;
+ DEBUG(errs() << "Unknown type for register number: " << RegNo);
+ llvm_unreachable("Unknown register type");
+ return MVT::Other;
+}
+
+std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
+ unsigned RegNo = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
+ "Unlowered physical register encountered during assembly printing");
+ assert(!MFI->isVRegStackified(RegNo));
+ unsigned WAReg = MFI->getWAReg(RegNo);
+ assert(WAReg != WebAssemblyFunctionInfo::UnusedReg);
+ return '$' + utostr(WAReg);
+}
+
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+ return WebAssembly::TypeToString(VT);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssemblyAsmPrinter Implementation.
+//===----------------------------------------------------------------------===//
+
+void WebAssemblyAsmPrinter::EmitConstantPool() {
+ assert(MF->getConstantPool()->getConstants().empty() &&
+ "WebAssembly disables constant pools");
+}
+
+void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
+ // Nothing to do; jump tables are incorporated into the instruction stream.
+}
+
+static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+ Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+ const DataLayout &DL(F.getParent()->getDataLayout());
+ const WebAssemblyTargetLowering &TLI =
+ *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+ SmallVector<EVT, 4> VTs;
+ ComputeValueVTs(TLI, DL, Ty, VTs);
+
+ for (EVT VT : VTs) {
+ unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
+ MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ ValueVTs.push_back(RegisterVT);
+ }
+}
+
+void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
+ if (!MFI->getParams().empty()) {
+ MCInst Param;
+ Param.setOpcode(WebAssembly::PARAM);
+ for (MVT VT : MFI->getParams())
+ Param.addOperand(MCOperand::createImm(VT.SimpleTy));
+ EmitToStreamer(*OutStreamer, Param);
+ }
+
+ SmallVector<MVT, 4> ResultVTs;
+ const Function &F(*MF->getFunction());
+ ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+ // If the return type needs to be legalized it will get converted into
+ // passing a pointer.
+ if (ResultVTs.size() == 1) {
+ MCInst Result;
+ Result.setOpcode(WebAssembly::RESULT);
+ Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
+ EmitToStreamer(*OutStreamer, Result);
+ }
+
+ bool AnyWARegs = false;
+ MCInst Local;
+ Local.setOpcode(WebAssembly::LOCAL);
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
+ unsigned WAReg = MFI->getWAReg(VReg);
+ // Don't declare unused registers.
+ if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
+ continue;
+ // Don't redeclare parameters.
+ if (WAReg < MFI->getParams().size())
+ continue;
+ // Don't declare stackified registers.
+ if (int(WAReg) < 0)
+ continue;
+ Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+ AnyWARegs = true;
+ }
+ auto &PhysRegs = MFI->getPhysRegs();
+ for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
+ if (PhysRegs[PReg] == -1U)
+ continue;
+ Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+ AnyWARegs = true;
+ }
+ if (AnyWARegs)
+ EmitToStreamer(*OutStreamer, Local);
+
+ AsmPrinter::EmitFunctionBodyStart();
+}
+
+void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
+
+ switch (MI->getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ // These represent values which are live into the function entry, so there's
+ // no instruction to emit.
+ break;
+ case WebAssembly::LOOP_END:
+ // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
+ // fallthrough which nevertheless requires a label for the destination here.
+ break;
+ default: {
+ WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ break;
+ }
+ }
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (AsmVariant != 0)
+ report_fatal_error("There are no defined alternate asm variants");
+
+ // First try the generic code, which knows about modifiers like 'c' and 'n'.
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+ return false;
+
+ if (!ExtraCode) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ OS << MO.getImm();
+ return false;
+ case MachineOperand::MO_Register:
+ OS << regToString(MO);
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ getSymbol(MO.getGlobal())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+ return false;
+ case MachineOperand::MO_ExternalSymbol:
+ GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+ return false;
+ case MachineOperand::MO_MachineBasicBlock:
+ MO.getMBB()->getSymbol()->print(OS, MAI);
+ return false;
+ default:
+ break;
+ }
+ }
+
+ return true;
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &OS) {
+ if (AsmVariant != 0)
+ report_fatal_error("There are no defined alternate asm variants");
+
+ if (!ExtraCode) {
+ // TODO: For now, we just hard-code 0 as the constant offset; teach
+ // SelectInlineAsmMemoryOperand how to do address mode matching.
+ OS << "0(" + regToString(MI->getOperand(OpNo)) + ')';
+ return false;
+ }
+
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
+ RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32);
+ RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
new file mode 100644
index 0000000..e9671ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -0,0 +1,468 @@
+//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a CFG stacking pass.
+///
+/// This pass reorders the blocks in a function to put them into a reverse
+/// post-order [0], with special care to keep the order as similar as possible
+/// to the original order, and to keep loops contiguous even in the case of
+/// split backedges.
+///
+/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
+/// scope boundaries serve as the labels for WebAssembly's control transfers.
+///
+/// This is sufficient to convert arbitrary CFGs into a form that works on
+/// WebAssembly, provided that all loops are single-entry.
+///
+/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-cfg-stackify"
+
+namespace {
+class WebAssemblyCFGStackify final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly CFG Stackify";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCFGStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyCFGStackify() {
+ return new WebAssemblyCFGStackify();
+}
+
+static void EliminateMultipleEntryLoops(MachineFunction &MF,
+ const MachineLoopInfo &MLI) {
+ SmallPtrSet<MachineBasicBlock *, 8> InSet;
+ for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF);
+ I != E; ++I) {
+ const std::vector<MachineBasicBlock *> &CurrentSCC = *I;
+
+ // Skip trivial SCCs.
+ if (CurrentSCC.size() == 1)
+ continue;
+
+ InSet.insert(CurrentSCC.begin(), CurrentSCC.end());
+ MachineBasicBlock *Header = nullptr;
+ for (MachineBasicBlock *MBB : CurrentSCC) {
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (InSet.count(Pred))
+ continue;
+ if (!Header) {
+ Header = MBB;
+ break;
+ }
+ // TODO: Implement multiple-entry loops.
+ report_fatal_error("multiple-entry loops are not supported yet");
+ }
+ }
+ assert(MLI.isLoopHeader(Header));
+
+ InSet.clear();
+ }
+}
+
+namespace {
+/// Post-order traversal stack entry.
+struct POStackEntry {
+ MachineBasicBlock *MBB;
+ SmallVector<MachineBasicBlock *, 0> Succs;
+
+ POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+ const MachineLoopInfo &MLI);
+};
+} // end anonymous namespace
+
+static bool LoopContains(const MachineLoop *Loop,
+ const MachineBasicBlock *MBB) {
+ return Loop ? Loop->contains(MBB) : true;
+}
+
+POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+ const MachineLoopInfo &MLI)
+ : MBB(MBB), Succs(MBB->successors()) {
+ // RPO is not a unique form, since at every basic block with multiple
+ // successors, the DFS has to pick which order to visit the successors in.
+ // Sort them strategically (see below).
+ MachineLoop *Loop = MLI.getLoopFor(MBB);
+ MachineFunction::iterator Next = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next;
+ std::stable_sort(
+ Succs.begin(), Succs.end(),
+ [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+ if (A == B)
+ return false;
+
+ // Keep loops contiguous by preferring the block that's in the same
+ // loop.
+ bool LoopContainsA = LoopContains(Loop, A);
+ bool LoopContainsB = LoopContains(Loop, B);
+ if (LoopContainsA && !LoopContainsB)
+ return true;
+ if (!LoopContainsA && LoopContainsB)
+ return false;
+
+ // Minimize perturbation by preferring the block which is the immediate
+ // layout successor.
+ if (A == LayoutSucc)
+ return true;
+ if (B == LayoutSucc)
+ return false;
+
+ // TODO: More sophisticated orderings may be profitable here.
+
+ return false;
+ });
+}
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
+
+/// Sort the blocks in RPO, taking special care to make sure that loops are
+/// contiguous even in the case of split backedges.
+///
+/// TODO: Determine whether RPO is actually worthwhile, or whether we should
+/// move to just a stable-topological-sort-based approach that would preserve
+/// more of the original order.
+static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
+ // Note that we do our own RPO rather than using
+ // "llvm/ADT/PostOrderIterator.h" because we want control over the order that
+ // successors are visited in (see above). Also, we can sort the blocks in the
+ // MachineFunction as we go.
+ SmallPtrSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<POStackEntry, 16> Stack;
+
+ MachineBasicBlock *EntryBlock = &*MF.begin();
+ Visited.insert(EntryBlock);
+ Stack.push_back(POStackEntry(EntryBlock, MF, MLI));
+
+ for (;;) {
+ POStackEntry &Entry = Stack.back();
+ SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs;
+ if (!Succs.empty()) {
+ MachineBasicBlock *Succ = Succs.pop_back_val();
+ if (Visited.insert(Succ).second)
+ Stack.push_back(POStackEntry(Succ, MF, MLI));
+ continue;
+ }
+
+ // Put the block in its position in the MachineFunction.
+ MachineBasicBlock &MBB = *Entry.MBB;
+ MBB.moveBefore(&*MF.begin());
+
+ // Branch instructions may utilize a fallthrough, so update them if a
+ // fallthrough has been added or removed.
+ if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() &&
+ !MBB.back().isBarrier())
+ report_fatal_error(
+ "Non-branch terminator with fallthrough cannot yet be rewritten");
+ if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch())
+ MBB.updateTerminator();
+
+ Stack.pop_back();
+ if (Stack.empty())
+ break;
+ }
+
+ // Now that we've sorted the blocks in RPO, renumber them.
+ MF.RenumberBlocks();
+
+#ifndef NDEBUG
+ SmallSetVector<MachineLoop *, 8> OnStack;
+
+ // Insert a sentinel representing the degenerate loop that starts at the
+ // function entry block and includes the entire function as a "loop" that
+ // executes once.
+ OnStack.insert(nullptr);
+
+ for (auto &MBB : MF) {
+ assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (Loop && &MBB == Loop->getHeader()) {
+ // Loop header. The loop predecessor should be sorted above, and the other
+ // predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or backedges");
+ assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
+ "Blocks must be nested in their loops");
+ }
+ while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
+ OnStack.pop_back();
+ }
+ assert(OnStack.pop_back_val() == nullptr &&
+ "The function entry block shouldn't actually be a loop header");
+ assert(OnStack.empty() &&
+ "Control flow stack pushes and pops should be balanced.");
+#endif
+}
+
+/// Test whether Pred has any terminators explicitly branching to MBB, as
+/// opposed to falling through. Note that it's possible (eg. in unoptimized
+/// code) for a branch instruction to both branch to a block and fallthrough
+/// to it, so we check the actual branch operands to see if there are any
+/// explicit mentions.
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+ for (MachineInstr &MI : Pred->terminators())
+ for (MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB() == MBB)
+ return true;
+ return false;
+}
+
+/// Insert a BLOCK marker for branches to MBB (if needed).
+static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ const WebAssemblyInstrInfo &TII,
+ const MachineLoopInfo &MLI,
+ MachineDominatorTree &MDT) {
+ // First compute the nearest common dominator of all forward non-fallthrough
+ // predecessors so that we minimize the time that the BLOCK is on the stack,
+ // which reduces overall stack height.
+ MachineBasicBlock *Header = nullptr;
+ bool IsBranchedTo = false;
+ int MBBNumber = MBB.getNumber();
+ for (MachineBasicBlock *Pred : MBB.predecessors())
+ if (Pred->getNumber() < MBBNumber) {
+ Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+ if (ExplicitlyBranchesTo(Pred, &MBB))
+ IsBranchedTo = true;
+ }
+ if (!Header)
+ return;
+ if (!IsBranchedTo)
+ return;
+
+ assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
+ MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
+
+ // If the nearest common dominator is inside a more deeply nested context,
+ // walk out to the nearest scope which isn't more deeply nested.
+ for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+ if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+ if (ScopeTop->getNumber() > Header->getNumber()) {
+ // Skip over an intervening scope.
+ I = next(MachineFunction::iterator(ScopeTop));
+ } else {
+ // We found a scope level at an appropriate depth.
+ Header = ScopeTop;
+ break;
+ }
+ }
+ }
+
+ // If there's a loop which ends just before MBB which contains Header, we can
+ // reuse its label instead of inserting a new BLOCK.
+ for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
+ Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
+ if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
+ return;
+
+ // Decide where in Header to put the BLOCK.
+ MachineBasicBlock::iterator InsertPos;
+ MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
+ if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) {
+ // Header is the header of a loop that does not lexically contain MBB, so
+ // the BLOCK needs to be above the LOOP.
+ InsertPos = Header->begin();
+ } else {
+ // Otherwise, insert the BLOCK as late in Header as we can, but before the
+ // beginning of the local expression tree and any nested BLOCKs.
+ InsertPos = Header->getFirstTerminator();
+ while (InsertPos != Header->begin() &&
+ prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
+ prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+ --InsertPos;
+ }
+
+ // Add the BLOCK.
+ BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
+ .addMBB(&MBB);
+
+ // Track the farthest-spanning scope that ends at this point.
+ int Number = MBB.getNumber();
+ if (!ScopeTops[Number] ||
+ ScopeTops[Number]->getNumber() > Header->getNumber())
+ ScopeTops[Number] = Header;
+}
+
+/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
+static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ const WebAssemblyInstrInfo &TII,
+ const MachineLoopInfo &MLI) {
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (!Loop || Loop->getHeader() != &MBB)
+ return;
+
+ // The operand of a LOOP is the first block after the loop. If the loop is the
+ // bottom of the function, insert a dummy block at the end.
+ MachineBasicBlock *Bottom = LoopBottom(Loop);
+ auto Iter = next(MachineFunction::iterator(Bottom));
+ if (Iter == MF.end()) {
+ MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+ // Give it a fake predecessor so that AsmPrinter prints its label.
+ Label->addSuccessor(Label);
+ MF.push_back(Label);
+ Iter = next(MachineFunction::iterator(Bottom));
+ }
+ MachineBasicBlock *AfterLoop = &*Iter;
+ BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
+ .addMBB(AfterLoop);
+
+ // Emit a special no-op telling the asm printer that we need a label to close
+ // the loop scope, even though the destination is only reachable by
+ // fallthrough.
+ if (!Bottom->back().isBarrier())
+ BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+
+ assert((!ScopeTops[AfterLoop->getNumber()] ||
+ ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
+ "With RPO we should visit the outer-most loop for a block first.");
+ if (!ScopeTops[AfterLoop->getNumber()])
+ ScopeTops[AfterLoop->getNumber()] = &MBB;
+}
+
+/// Insert LOOP and BLOCK markers at appropriate places.
+static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const WebAssemblyInstrInfo &TII,
+ MachineDominatorTree &MDT) {
+ // For each block whose label represents the end of a scope, record the block
+ // which holds the beginning of the scope. This will allow us to quickly skip
+ // over scoped regions when walking blocks. We allocate one more than the
+ // number of blocks in the function to accommodate for the possible fake block
+ // we may insert at the end.
+ SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
+
+ for (auto &MBB : MF) {
+ // Place the LOOP for MBB if MBB is the header of a loop.
+ PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+
+ // Place the BLOCK for MBB if MBB is branched to from above.
+ PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
+ }
+}
+
+#ifndef NDEBUG
+static bool
+IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
+ const MachineBasicBlock *MBB) {
+ for (const auto &Pair : Stack)
+ if (Pair.first == MBB)
+ return true;
+ return false;
+}
+#endif
+
+bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** CFG Stackifying **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+ // RPO sorting needs all loops to be single-entry.
+ EliminateMultipleEntryLoops(MF, MLI);
+
+ // Sort the blocks in RPO, with contiguous loops.
+ SortBlocks(MF, MLI);
+
+ // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
+ PlaceMarkers(MF, MLI, TII, MDT);
+
+#ifndef NDEBUG
+ // Verify that block and loop beginnings and endings are in LIFO order, and
+ // that all references to blocks are to blocks on the stack at the point of
+ // the reference.
+ SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
+ for (auto &MBB : MF) {
+ while (!Stack.empty() && Stack.back().first == &MBB)
+ if (Stack.back().second) {
+ assert(Stack.size() >= 2);
+ Stack.pop_back();
+ Stack.pop_back();
+ } else {
+ assert(Stack.size() >= 1);
+ Stack.pop_back();
+ }
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ case WebAssembly::LOOP:
+ Stack.push_back(std::make_pair(&MBB, false));
+ Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
+ break;
+ case WebAssembly::BLOCK:
+ Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
+ break;
+ default:
+ // Verify that all referenced blocks are in scope. A reference to a
+ // block with a negative number is invalid, but can happen with inline
+ // asm, so we shouldn't assert on it, but instead let CodeGen properly
+ // fail on it.
+ for (const MachineOperand &MO : MI.explicit_operands())
+ if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
+ assert(IsOnStack(Stack, MO.getMBB()));
+ break;
+ }
+ }
+ assert(Stack.empty());
+#endif
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
new file mode 100644
index 0000000..1b761b1
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -0,0 +1,81 @@
+//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific support for the FastISel
+/// class. Some of the target-specific code is generated by tablegen in the file
+/// WebAssemblyGenFastISel.inc, which is #included here.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fastisel"
+
+namespace {
+
+class WebAssemblyFastISel final : public FastISel {
+ /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+ /// right decision when generating code for different targets.
+ const WebAssemblySubtarget *Subtarget;
+ LLVMContext *Context;
+
+ // Call handling routines.
+private:
+public:
+ // Backend specific FastISel code.
+ WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
+ Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>();
+ Context = &FuncInfo.Fn->getContext();
+ }
+
+ bool fastSelectInstruction(const Instruction *I) override;
+
+#include "WebAssemblyGenFastISel.inc"
+};
+
+} // end anonymous namespace
+
+bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
+ switch (I->getOpcode()) {
+ default:
+ break;
+ // TODO: add fast-isel selection cases here...
+ }
+
+ // Fall back to target-independent instruction selection.
+ return selectOperator(I, I->getOpcode());
+}
+
+FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ return new WebAssemblyFastISel(FuncInfo, LibInfo);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index e4ca82e..0eefd57 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -35,11 +35,20 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-frame-info"
// TODO: Implement a red zone?
+// TODO: wasm64
+// TODO: Prolog/epilog should be stackified too. This pass runs after register
+// stackification, so we'll have to do it manually.
+// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
/// Return true if the specified function should have a dedicated frame pointer
/// register.
bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
- llvm_unreachable("TODO: implement hasFP");
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const auto *RegInfo =
+ MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ RegInfo->needsStackRealignment(MF);
}
/// Under normal circumstances, when a frame pointer is not required, we reserve
@@ -52,23 +61,115 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+
+/// Adjust the stack pointer by a constant amount.
+static void adjustStackPointer(unsigned StackSize,
+ bool AdjustUp,
+ MachineFunction& MF,
+ MachineBasicBlock& MBB,
+ const TargetInstrInfo* TII,
+ MachineBasicBlock::iterator InsertPt,
+ const DebugLoc& DL) {
+ auto &MRI = MF.getRegInfo();
+ unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg)
+ .addExternalSymbol(SPSymbol);
+ // This MachinePointerInfo should reference __stack_pointer as well but
+ // doesn't because MachinePointerInfo() takes a GV which we don't have for
+ // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry
+ // is appropriate instead. (likewise for EmitEpologue below)
+ auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOLoad, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+ .addImm(0)
+ .addReg(SPReg)
+ .addMemOperand(LoadMMO);
+ // Add/Subtract the frame size
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
+ BuildMI(MBB, InsertPt, DL,
+ TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32),
+ WebAssembly::SP32)
+ .addReg(SPReg)
+ .addReg(OffsetReg);
+ // The SP32 register now has the new stacktop. Also write it back to memory.
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addExternalSymbol(SPSymbol);
+ auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addMemOperand(MMO);
+}
+
void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr");
+ const auto *TII =
+ static_cast<const WebAssemblyInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ DebugLoc DL = I->getDebugLoc();
+ unsigned Opc = I->getOpcode();
+ bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
+ unsigned Amount = I->getOperand(0).getImm();
+ if (Amount)
+ adjustStackPointer(Amount, IsDestroy, MF, MBB,
+ TII, I, DL);
+ MBB.erase(I);
}
void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- llvm_unreachable("TODO: implement emitPrologue");
+ // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions
+ auto *MFI = MF.getFrameInfo();
+ assert(MFI->getCalleeSavedInfo().empty() &&
+ "WebAssembly should not have callee-saved registers");
+ assert(!hasFP(MF) && "Functions needing frame pointers not yet supported");
+ uint64_t StackSize = MFI->getStackSize();
+ if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0))
+ return;
+
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+
+ auto InsertPt = MBB.begin();
+ DebugLoc DL;
+
+ adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL);
}
void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
- llvm_unreachable("TODO: implement emitEpilogue");
-}
+ uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+ if (!StackSize)
+ return;
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+ auto &MRI = MF.getRegInfo();
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ auto InsertPt = MBB.getFirstTerminator();
+ DebugLoc DL;
+
+ if (InsertPt != MBB.end()) {
+ DL = InsertPt->getDebugLoc();
+ }
-void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
- llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan");
+ // Restore the stack pointer. Without FP its value is just SP32 - stacksize
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(StackSize);
+ auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32)
+ .addReg(WebAssembly::SP32)
+ .addReg(OffsetReg);
+ // Re-use OffsetReg to hold the address of the stacktop
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addExternalSymbol(SPSymbol);
+ auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+ .addImm(0)
+ .addReg(OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index 0b112d0..5f4708f 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -38,9 +38,6 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
new file mode 100644
index 0000000..3a03fa5
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -0,0 +1,25 @@
+//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file describes the various WebAssembly ISD node types.
+///
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+HANDLE_NODETYPE(CALL1)
+HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(RETURN)
+HANDLE_NODETYPE(ARGUMENT)
+HANDLE_NODETYPE(Wrapper)
+HANDLE_NODETYPE(BR_IF)
+HANDLE_NODETYPE(TABLESWITCH)
+
+// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 518ef33..8390f79 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -56,13 +56,68 @@ public:
SDNode *Select(SDNode *Node) override;
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
+// Include the pieces autogenerated from the target description.
+#include "WebAssemblyGenDAGISel.inc"
+
private:
// add select functions here...
};
} // end anonymous namespace
SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
- llvm_unreachable("TODO: implement Select");
+ // Dump information about the Node being selected.
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ Node->setNodeId(-1);
+ return nullptr;
+ }
+
+ // Few custom selection stuff.
+ SDNode *ResNode = nullptr;
+ EVT VT = Node->getValueType(0);
+
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ // If we need WebAssembly-specific selection, it would go here.
+ (void)VT;
+ }
+
+ // Select the default instruction.
+ ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == nullptr || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+
+ return ResNode;
+}
+
+bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ switch (ConstraintID) {
+ case InlineAsm::Constraint_i:
+ case InlineAsm::Constraint_m:
+ // We just support simple memory operands that just have a single address
+ // operand and need no special handling.
+ OutOps.push_back(Op);
+ return false;
+ default:
+ break;
+ }
+
+ return true;
}
/// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4184eb6..7a89f78 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -17,10 +17,13 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
-#include "WebAssemblyTargetObjectFile.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/CommandLine.h"
@@ -32,14 +35,254 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-lower"
+namespace {
+// Diagnostic information for unimplemented or unsupported feature reporting.
+// TODO: This code is copied from BPF and AMDGPU; consider factoring it out
+// and sharing code.
+class DiagnosticInfoUnsupported final : public DiagnosticInfo {
+private:
+ // Debug location where this diagnostic is triggered.
+ DebugLoc DLoc;
+ const Twine &Description;
+ const Function &Fn;
+ SDValue Value;
+
+ static int KindID;
+
+ static int getKindID() {
+ if (KindID == 0)
+ KindID = llvm::getNextAvailablePluginDiagnosticKind();
+ return KindID;
+ }
+
+public:
+ DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc,
+ SDValue Value)
+ : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()),
+ Description(Desc), Fn(Fn), Value(Value) {}
+
+ void print(DiagnosticPrinter &DP) const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+
+ if (DLoc) {
+ auto DIL = DLoc.get();
+ StringRef Filename = DIL->getFilename();
+ unsigned Line = DIL->getLine();
+ unsigned Column = DIL->getColumn();
+ OS << Filename << ':' << Line << ':' << Column << ' ';
+ }
+
+ OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n'
+ << Description;
+ if (Value)
+ Value->print(OS);
+ OS << '\n';
+ OS.flush();
+ DP << Str;
+ }
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == getKindID();
+ }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+} // end anonymous namespace
+
WebAssemblyTargetLowering::WebAssemblyTargetLowering(
const TargetMachine &TM, const WebAssemblySubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
+
+ // Booleans always contain 0 or 1.
+ setBooleanContents(ZeroOrOneBooleanContent);
// WebAssembly does not produce floating-point exceptions on normal floating
// point operations.
setHasFloatingPointExceptions(false);
// We don't know the microarchitecture here, so just reduce register pressure.
setSchedulingPreference(Sched::RegPressure);
+ // Tell ISel that we have a stack pointer.
+ setStackPointerRegisterToSaveRestore(
+ Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
+ addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
+ addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
+ addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
+ // Compute derived properties from the register classes.
+ computeRegisterProperties(Subtarget->getRegisterInfo());
+
+ setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
+ setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
+ setOperationAction(ISD::JumpTable, MVTPtr, Custom);
+
+ // Take the default expansion for va_arg, va_copy, and va_end. There is no
+ // default action for va_start, so we do that custom.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+ for (auto T : {MVT::f32, MVT::f64}) {
+ // Don't expand the floating-point types to constant pools.
+ setOperationAction(ISD::ConstantFP, T, Legal);
+ // Expand floating-point comparisons.
+ for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
+ ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
+ setCondCodeAction(CC, T, Expand);
+ // Expand floating-point library function operators.
+ for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW,
+ ISD::FREM, ISD::FMA})
+ setOperationAction(Op, T, Expand);
+ // Note supported floating-point library function operators that otherwise
+ // default to expand.
+ for (auto Op :
+ {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
+ setOperationAction(Op, T, Legal);
+ // Support minnan and maxnan, which otherwise default to expand.
+ setOperationAction(ISD::FMINNAN, T, Legal);
+ setOperationAction(ISD::FMAXNAN, T, Legal);
+ }
+
+ for (auto T : {MVT::i32, MVT::i64}) {
+ // Expand unavailable integer operations.
+ for (auto Op :
+ {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
+ ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
+ ISD::SUBE}) {
+ setOperationAction(Op, T, Expand);
+ }
+ }
+
+ // As a special case, these operators use the type to mean the type to
+ // sign-extend from.
+ for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32})
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+
+ // Dynamic stack allocation: use the default expansion.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
+
+ setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+ // Expand these forms; we pattern-match the forms that we can handle in isel.
+ for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+ for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
+ setOperationAction(Op, T, Expand);
+
+ // We have custom switch handling.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+ // WebAssembly doesn't have:
+ // - Floating-point extending loads.
+ // - Floating-point truncating stores.
+ // - i1 extending loads.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ for (auto T : MVT::integer_valuetypes())
+ for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+ setLoadExtAction(Ext, T, MVT::i1, Promote);
+
+ // Trap lowers to wasm unreachable
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+}
+
+FastISel *WebAssemblyTargetLowering::createFastISel(
+ FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
+ return WebAssembly::createFastISel(FuncInfo, LibInfo);
+}
+
+bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode * /*GA*/) const {
+ // All offsets can be folded.
+ return true;
+}
+
+MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
+ EVT VT) const {
+ unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
+ if (BitWidth > 1 && BitWidth < 8)
+ BitWidth = 8;
+
+ if (BitWidth > 64) {
+ BitWidth = 64;
+ assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
+ "64-bit shift counts ought to be enough for anyone");
+ }
+
+ MVT Result = MVT::getIntegerVT(BitWidth);
+ assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Unable to represent scalar shift amount type");
+ return Result;
+}
+
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
+ case WebAssemblyISD::FIRST_NUMBER:
+ break;
+#define HANDLE_NODETYPE(NODE) \
+ case WebAssemblyISD::NODE: \
+ return "WebAssemblyISD::" #NODE;
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
+ }
+ return nullptr;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+ // First, see if this is a constraint that directly corresponds to a
+ // WebAssembly register class.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+ if (VT.isInteger() && !VT.isVector()) {
+ if (VT.getSizeInBits() <= 32)
+ return std::make_pair(0U, &WebAssembly::I32RegClass);
+ if (VT.getSizeInBits() <= 64)
+ return std::make_pair(0U, &WebAssembly::I64RegClass);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
+ // Assume ctz is a relatively cheap operation.
+ return true;
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
+ // Assume clz is a relatively cheap operation.
+ return true;
+}
+
+bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM,
+ Type *Ty,
+ unsigned AS) const {
+ // WebAssembly offsets are added as unsigned without wrapping. The
+ // isLegalAddressingMode gives us no way to determine if wrapping could be
+ // happening, so we approximate this by accepting only non-negative offsets.
+ if (AM.BaseOffs < 0)
+ return false;
+
+ // WebAssembly has no scale register operands.
+ if (AM.Scale != 0)
+ return false;
+
+ // Everything else is legal.
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -50,16 +293,359 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Lowering Code
//===----------------------------------------------------------------------===//
+static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ DAG.getContext()->diagnose(
+ DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue()));
+}
+
+// Test whether the given calling convention is supported.
+static bool CallingConvSupported(CallingConv::ID CallConv) {
+ // We currently support the language-independent target-independent
+ // conventions. We don't yet have a way to annotate calls with properties like
+ // "cold", and we don't have any call-clobbered registers, so these are mostly
+ // all handled the same.
+ return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
+ CallConv == CallingConv::Cold ||
+ CallConv == CallingConv::PreserveMost ||
+ CallConv == CallingConv::PreserveAll ||
+ CallConv == CallingConv::CXX_FAST_TLS;
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc DL = CLI.DL;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ CallingConv::ID CallConv = CLI.CallConv;
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG,
+ "WebAssembly doesn't support language-specific or target-specific "
+ "calling conventions yet");
+ if (CLI.IsPatchPoint)
+ fail(DL, DAG, "WebAssembly doesn't support patch point yet");
+
+ // WebAssembly doesn't currently support explicit tail calls. If they are
+ // required, fail. Otherwise, just disable them.
+ if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
+ MF.getTarget().Options.GuaranteedTailCallOpt) ||
+ (CLI.CS && CLI.CS->isMustTailCall()))
+ fail(DL, DAG, "WebAssembly doesn't support tail call yet");
+ CLI.IsTailCall = false;
+
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ if (Ins.size() > 1)
+ fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
+
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ for (const ISD::OutputArg &Out : Outs) {
+ if (Out.Flags.isByVal())
+ fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+ if (Out.Flags.isNest())
+ fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+ if (Out.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+ if (Out.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+ if (Out.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+ }
+
+ bool IsVarArg = CLI.IsVarArg;
+ unsigned NumFixedArgs = CLI.NumFixedArgs;
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ if (IsVarArg) {
+ // Outgoing non-fixed arguments are placed at the top of the stack. First
+ // compute their offsets and the total amount of argument stack space
+ // needed.
+ for (SDValue Arg :
+ make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ EVT VT = Arg.getValueType();
+ assert(VT != MVT::iPTR && "Legalized args should be concrete");
+ Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ unsigned Offset =
+ CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty),
+ MF.getDataLayout().getABITypeAlignment(Ty));
+ CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
+ Offset, VT.getSimpleVT(),
+ CCValAssign::Full));
+ }
+ }
+
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
+
+ SDValue NB;
+ if (NumBytes) {
+ NB = DAG.getConstant(NumBytes, DL, PtrVT, true);
+ Chain = DAG.getCALLSEQ_START(Chain, NB, DL);
+ }
+
+ if (IsVarArg) {
+ // For non-fixed arguments, next emit stores to store the argument values
+ // to the stack at the offsets computed above.
+ SDValue SP = DAG.getCopyFromReg(
+ Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT);
+ unsigned ValNo = 0;
+ SmallVector<SDValue, 8> Chains;
+ for (SDValue Arg :
+ make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ assert(ArgLocs[ValNo].getValNo() == ValNo &&
+ "ArgLocs should remain in order and only hold varargs args");
+ unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
+ SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP,
+ DAG.getConstant(Offset, DL, PtrVT));
+ Chains.push_back(DAG.getStore(Chain, DL, Arg, Add,
+ MachinePointerInfo::getStack(MF, Offset),
+ false, false, 0));
+ }
+ if (!Chains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ // Compute the operands for the CALLn node.
+ SmallVector<SDValue, 16> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
+ // isn't reliable.
+ Ops.append(OutVals.begin(),
+ IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
+
+ SmallVector<EVT, 8> Tys;
+ for (const auto &In : Ins) {
+ assert(!In.Flags.isByVal() && "byval is not valid for return values");
+ assert(!In.Flags.isNest() && "nest is not valid for return values");
+ if (In.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
+ if (In.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
+ if (In.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG,
+ "WebAssembly hasn't implemented cons regs last return values");
+ // Ignore In.getOrigAlign() because all our arguments are passed in
+ // registers.
+ Tys.push_back(In.VT);
+ }
+ Tys.push_back(MVT::Other);
+ SDVTList TyList = DAG.getVTList(Tys);
+ SDValue Res =
+ DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
+ DL, TyList, Ops);
+ if (Ins.empty()) {
+ Chain = Res;
+ } else {
+ InVals.push_back(Res);
+ Chain = Res.getValue(1);
+ }
+
+ if (NumBytes) {
+ SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT);
+ Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL);
+ }
+
+ return Chain;
+}
+
+bool WebAssemblyTargetLowering::CanLowerReturn(
+ CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext & /*Context*/) const {
+ // WebAssembly can't currently handle returning tuples.
+ return Outs.size() <= 1;
+}
+
+SDValue WebAssemblyTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
+ SelectionDAG &DAG) const {
+ assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ RetOps.append(OutVals.begin(), OutVals.end());
+ Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
+
+ // Record the number and types of the return values.
+ for (const ISD::OutputArg &Out : Outs) {
+ assert(!Out.Flags.isByVal() && "byval is not valid for return values");
+ assert(!Out.Flags.isNest() && "nest is not valid for return values");
+ assert(Out.IsFixed && "non-fixed return value is not valid");
+ if (Out.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
+ if (Out.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
+ if (Out.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
+ }
+
+ return Chain;
+}
+
+SDValue WebAssemblyTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ if (!CallingConvSupported(CallConv))
+ fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+ // Set up the incoming ARGUMENTS value, which serves to represent the liveness
+ // of the incoming values before they're represented by virtual registers.
+ MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
+
+ for (const ISD::InputArg &In : Ins) {
+ if (In.Flags.isByVal())
+ fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+ if (In.Flags.isInAlloca())
+ fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+ if (In.Flags.isNest())
+ fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+ if (In.Flags.isInConsecutiveRegs())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+ if (In.Flags.isInConsecutiveRegsLast())
+ fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+ // Ignore In.getOrigAlign() because all our arguments are passed in
+ // registers.
+ InVals.push_back(
+ In.Used
+ ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+ DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
+ : DAG.getUNDEF(In.VT));
+
+ // Record the number and types of arguments.
+ MF.getInfo<WebAssemblyFunctionInfo>()->addParam(In.VT);
+ }
+
+ // Incoming varargs arguments are on the stack and will be accessed through
+ // va_arg, so we don't need to do anything for them here.
+
+ return Chain;
+}
+
//===----------------------------------------------------------------------===//
-// Other Lowering Code
+// Custom lowering hooks.
//===----------------------------------------------------------------------===//
+SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unimplemented operation lowering");
+ return SDValue();
+ case ISD::FrameIndex:
+ return LowerFrameIndex(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::ExternalSymbol:
+ return LowerExternalSymbol(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ }
+}
+
+SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
+ SelectionDAG &DAG) const {
+ int FI = cast<FrameIndexSDNode>(Op)->getIndex();
+ return DAG.getTargetFrameIndex(FI, Op.getValueType());
+}
+
+SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const auto *GA = cast<GlobalAddressSDNode>(Op);
+ EVT VT = Op.getValueType();
+ assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ if (GA->getAddressSpace() != 0)
+ fail(DL, DAG, "WebAssembly only expects the 0 address space");
+ return DAG.getNode(
+ WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ const auto *ES = cast<ExternalSymbolSDNode>(Op);
+ EVT VT = Op.getValueType();
+ assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+}
+
+SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ // There's no need for a Wrapper node because we always incorporate a jump
+ // table operand into a TABLESWITCH instruction, rather than ever
+ // materializing it in a register.
+ const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
+ JT->getTargetFlags());
+}
+
+SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
+ SDValue Index = Op.getOperand(2);
+ assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Index);
+
+ MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
+ const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
+
+ // TODO: For now, we just pick something arbitrary for a default case for now.
+ // We really want to sniff out the guard and put in the real default case (and
+ // delete the guard).
+ Ops.push_back(DAG.getBasicBlock(MBBs[0]));
+
+ // Add an operand for each case.
+ for (auto MBB : MBBs)
+ Ops.push_back(DAG.getBasicBlock(MBB));
+
+ return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops);
+}
+
+SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
+
+ // The incoming non-fixed arguments are placed on the top of the stack, with
+ // natural alignment, at the point of the call, so the base pointer is just
+ // the current frame pointer.
+ DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true);
+ unsigned FP =
+ Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Optimization Hooks
//===----------------------------------------------------------------------===//
-
-MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal(
- const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM) const {
- return getDataSection();
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index efd60a7..e7232a0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -22,10 +22,11 @@ namespace llvm {
namespace WebAssemblyISD {
-enum {
+enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+#define HANDLE_NODETYPE(NODE) NODE,
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
};
} // end namespace WebAssemblyISD
@@ -42,8 +43,51 @@ private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
/// right decision when generating code for different targets.
const WebAssemblySubtarget *Subtarget;
+
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
+ const char *getTargetNodeName(unsigned Opcode) const override;
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ StringRef Constraint, MVT VT) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
+
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+ SelectionDAG &DAG) const override;
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ // Custom lowering hooks.
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
};
+namespace WebAssembly {
+FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+} // end namespace WebAssembly
+
} // end namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 6b5b6cd..cfa1519 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -12,10 +12,63 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * call_direct: call function directly
- * call_indirect: call function indirectly
- * addressof: obtain a function pointer value for a given function
- */
+// TODO: addr64: These currently assume the callee address is 32-bit.
+
+let Defs = [ARGUMENTS] in {
+
+// Call sequence markers. These have an immediate which represents the amount of
+// stack space to allocate or free, which is used for varargs lowering.
+let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
+def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt),
+ [(WebAssemblycallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2),
+ [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
+} // isCodeGenOnly = 1
+
+multiclass CALL<WebAssemblyRegClass vt, string prefix> {
+ def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops),
+ [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
+ !strconcat(prefix, "call\t$dst, $callee")>;
+ def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+ [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+ !strconcat(prefix, "call_indirect\t$dst, $callee")>;
+}
+let Uses = [SP32, SP64], isCall = 1 in {
+ defm : CALL<I32, "i32.">;
+ defm : CALL<I64, "i64.">;
+ defm : CALL<F32, "f32.">;
+ defm : CALL<F64, "f64.">;
+
+ def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops),
+ [(WebAssemblycall0 (i32 imm:$callee))],
+ "call \t$callee">;
+ def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+ [(WebAssemblycall0 I32:$callee)],
+ "call_indirect\t$callee">;
+} // Uses = [SP32,SP64], isCall = 1
+
+} // Defs = [ARGUMENTS]
+
+// Patterns for matching a direct call to a global address.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_I32 tglobaladdr:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_I64 tglobaladdr:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_F32 tglobaladdr:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+ (CALL_F64 tglobaladdr:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
+ (CALL_VOID tglobaladdr:$callee)>;
+
+// Patterns for matching a direct call to an external symbol.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_I32 texternalsym:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_I64 texternalsym:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_F32 texternalsym:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+ (CALL_F64 texternalsym:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
+ (CALL_VOID texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
new file mode 100644
index 0000000..05efe89
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -0,0 +1,82 @@
+//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly control-flow code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+let Defs = [ARGUMENTS] in {
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+// The condition operand is a boolean value which WebAssembly represents as i32.
+def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst),
+ [(brcond I32:$cond, bb:$dst)],
+ "br_if \t$cond, $dst">;
+let isCodeGenOnly = 1 in
+def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [],
+ "br_unless\t$cond, $dst">;
+let isBarrier = 1 in {
+def BR : I<(outs), (ins bb_op:$dst),
+ [(br bb:$dst)],
+ "br \t$dst">;
+} // isBarrier = 1
+} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
+ (BR_IF I32:$cond, bb_op:$dst)>;
+def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
+ (BR_UNLESS I32:$cond, bb_op:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// TODO: SelectionDAG's lowering insists on using a pointer as the index for
+// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
+// currently.
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
+ [(WebAssemblytableswitch I32:$index, bb:$default)],
+ "tableswitch\t$index, $default">;
+def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
+ [(WebAssemblytableswitch I64:$index, bb:$default)],
+ "tableswitch\t$index, $default">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+// Placemarkers to indicate the start of a block or loop scope. These
+// use/clobber EXPR_STACK to prevent them from being moved into the middle of
+// an expression tree.
+let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
+def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">;
+def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">;
+} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
+
+// No-op to indicate to the AsmPrinter that a loop ends here, so a
+// basic block label is needed even if it wouldn't otherwise appear so.
+let isTerminator = 1, hasCtrlDep = 1 in
+def LOOP_END : I<(outs), (ins), []>;
+
+multiclass RETURN<WebAssemblyRegClass vt> {
+ def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
+ "return \t$val">;
+}
+
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+let isReturn = 1 in {
+ defm : RETURN<I32>;
+ defm : RETURN<I64>;
+ defm : RETURN<F32>;
+ defm : RETURN<F64>;
+ def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">;
+} // isReturn = 1
+ def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 3fa2906..931f4a9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,32 +13,99 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer
- * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer
- * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer
- * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer
- * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer
- * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer
- * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer
- * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer
- * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer
- * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer
- * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer
- * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer
- * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer
- * float32.demote[float64]: demote a 64-bit float to a 32-bit float
- * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float
- * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float
- * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float
- * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float
- * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float
- * float64.promote[float32]: promote a 32-bit float to a 64-bit float
- * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float
- * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float
- * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float
- * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float
- * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float
- */
+let Defs = [ARGUMENTS] in {
+
+def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src),
+ [(set I32:$dst, (trunc I64:$src))],
+ "i32.wrap/i64\t$dst, $src">;
+
+def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src),
+ [(set I64:$dst, (sext I32:$src))],
+ "i64.extend_s/i32\t$dst, $src">;
+def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
+ [(set I64:$dst, (zext I32:$src))],
+ "i64.extend_u/i32\t$dst, $src">;
+
+} // defs = [ARGUMENTS]
+
+// Expand a "don't care" extend into zero-extend (chosen over sign-extend
+// somewhat arbitrarily, although it favors popular hardware architectures
+// and is conceptually a simpler operation).
+def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Conversion from floating point to integer traps on overflow and invalid.
+let hasSideEffects = 1 in {
+def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (fp_to_sint F32:$src))],
+ "i32.trunc_s/f32\t$dst, $src">;
+def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (fp_to_uint F32:$src))],
+ "i32.trunc_u/f32\t$dst, $src">;
+def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
+ [(set I64:$dst, (fp_to_sint F32:$src))],
+ "i64.trunc_s/f32\t$dst, $src">;
+def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
+ [(set I64:$dst, (fp_to_uint F32:$src))],
+ "i64.trunc_u/f32\t$dst, $src">;
+def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
+ [(set I32:$dst, (fp_to_sint F64:$src))],
+ "i32.trunc_s/f64\t$dst, $src">;
+def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
+ [(set I32:$dst, (fp_to_uint F64:$src))],
+ "i32.trunc_u/f64\t$dst, $src">;
+def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (fp_to_sint F64:$src))],
+ "i64.trunc_s/f64\t$dst, $src">;
+def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (fp_to_uint F64:$src))],
+ "i64.trunc_u/f64\t$dst, $src">;
+} // hasSideEffects = 1
+
+def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (sint_to_fp I32:$src))],
+ "f32.convert_s/i32\t$dst, $src">;
+def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (uint_to_fp I32:$src))],
+ "f32.convert_u/i32\t$dst, $src">;
+def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src),
+ [(set F64:$dst, (sint_to_fp I32:$src))],
+ "f64.convert_s/i32\t$dst, $src">;
+def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src),
+ [(set F64:$dst, (uint_to_fp I32:$src))],
+ "f64.convert_u/i32\t$dst, $src">;
+def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src),
+ [(set F32:$dst, (sint_to_fp I64:$src))],
+ "f32.convert_s/i64\t$dst, $src">;
+def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src),
+ [(set F32:$dst, (uint_to_fp I64:$src))],
+ "f32.convert_u/i64\t$dst, $src">;
+def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (sint_to_fp I64:$src))],
+ "f64.convert_s/i64\t$dst, $src">;
+def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (uint_to_fp I64:$src))],
+ "f64.convert_u/i64\t$dst, $src">;
+
+def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src),
+ [(set F64:$dst, (fextend F32:$src))],
+ "f64.promote/f32\t$dst, $src">;
+def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src),
+ [(set F32:$dst, (fround F64:$src))],
+ "f32.demote/f64\t$dst, $src">;
+
+def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src),
+ [(set I32:$dst, (bitconvert F32:$src))],
+ "i32.reinterpret/f32\t$dst, $src">;
+def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src),
+ [(set F32:$dst, (bitconvert I32:$src))],
+ "f32.reinterpret/i32\t$dst, $src">;
+def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src),
+ [(set I64:$dst, (bitconvert F64:$src))],
+ "i64.reinterpret/f64\t$dst, $src">;
+def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src),
+ [(set F64:$dst, (bitconvert I64:$src))],
+ "f64.reinterpret/i64\t$dst, $src">;
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 30ef633..5520c6d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,33 +12,90 @@
///
//===----------------------------------------------------------------------===//
-defm FADD : BinaryFP<fadd>;
-defm FSUB : BinaryFP<fsub>;
-defm FMUL : BinaryFP<fmul>;
-defm FDIV : BinaryFP<fdiv>;
-defm FABS : UnaryFP<fabs>;
-defm FNEG : UnaryFP<fneg>;
-defm COPYSIGN : BinaryFP<fcopysign>;
-defm CEIL : UnaryFP<fceil>;
-defm FLOOR : UnaryFP<ffloor>;
-defm TRUNC : UnaryFP<ftrunc>;
-defm NEARESTINT : UnaryFP<fnearbyint>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.eq: compare equal
- * float32.lt: less than
- * float32.le: less than or equal
- * float32.gt: greater than
- * float32.ge: greater than or equal
- */
-
-defm SQRT : UnaryFP<fsqrt>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.min: minimum (binary operator); if either operand is NaN, returns NaN
- * float32.max: maximum (binary operator); if either operand is NaN, returns NaN
- */
+let Defs = [ARGUMENTS] in {
+
+let isCommutable = 1 in
+defm ADD : BinaryFP<fadd, "add ">;
+defm SUB : BinaryFP<fsub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryFP<fmul, "mul ">;
+defm DIV : BinaryFP<fdiv, "div ">;
+defm SQRT : UnaryFP<fsqrt, "sqrt">;
+
+defm ABS : UnaryFP<fabs, "abs ">;
+defm NEG : UnaryFP<fneg, "neg ">;
+defm COPYSIGN : BinaryFP<fcopysign, "copysign">;
+
+let isCommutable = 1 in {
+defm MIN : BinaryFP<fminnan, "min ">;
+defm MAX : BinaryFP<fmaxnan, "max ">;
+} // isCommutable = 1
+
+defm CEIL : UnaryFP<fceil, "ceil">;
+defm FLOOR : UnaryFP<ffloor, "floor">;
+defm TRUNC : UnaryFP<ftrunc, "trunc">;
+defm NEAREST : UnaryFP<fnearbyint, "nearest">;
+
+} // Defs = [ARGUMENTS]
+
+// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
+def : Pat<(fcopysign F64:$lhs, F32:$rhs),
+ (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
+def : Pat<(fcopysign F32:$lhs, F64:$rhs),
+ (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>;
+
+// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
+def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
+def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+let isCommutable = 1 in {
+defm EQ : ComparisonFP<SETOEQ, "eq ">;
+defm NE : ComparisonFP<SETUNE, "ne ">;
+} // isCommutable = 1
+defm LT : ComparisonFP<SETOLT, "lt ">;
+defm LE : ComparisonFP<SETOLE, "le ">;
+defm GT : ComparisonFP<SETOGT, "gt ">;
+defm GE : ComparisonFP<SETOGE, "ge ">;
+
+} // Defs = [ARGUMENTS]
+
+// Don't care floating-point comparisons, supported via other comparisons.
+def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs),
+ [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
+ "f32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs),
+ [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
+ "f64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
+ (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
+ (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 513c36f..8008dd3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -1,4 +1,4 @@
-// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-//
+//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -12,44 +12,68 @@
///
//===----------------------------------------------------------------------===//
-// WebAssembly Instruction Format
-class WebAssemblyInst<string cstr> : Instruction {
+// WebAssembly Instruction Format.
+class WebAssemblyInst<string asmstr> : Instruction {
field bits<0> Inst; // Instruction encoding.
let Namespace = "WebAssembly";
let Pattern = [];
- let Constraints = cstr;
+ let AsmString = asmstr;
}
-// Normal instructions
-class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
- : WebAssemblyInst<cstr> {
+// Normal instructions.
+class I<dag oops, dag iops, list<dag> pattern, string asmstr = "">
+ : WebAssemblyInst<asmstr> {
dag OutOperandList = oops;
dag InOperandList = iops;
let Pattern = pattern;
}
// Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node> {
- def _I32 : I<(outs Int32:$dst), (ins Int32:$src),
- [(set Int32:$dst, (node Int32:$src))]>;
- def _I64 : I<(outs Int64:$dst), (ins Int64:$src),
- [(set Int64:$dst, (node Int64:$src))]>;
-}
-multiclass BinaryInt<SDNode node> {
- def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs),
- [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>;
- def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs),
- [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>;
-}
-multiclass UnaryFP<SDNode node> {
- def _F32 : I<(outs Float32:$dst), (ins Float32:$src),
- [(set Float32:$dst, (node Float32:$src))]>;
- def _F64 : I<(outs Float64:$dst), (ins Float64:$src),
- [(set Float64:$dst, (node Float64:$src))]>;
-}
-multiclass BinaryFP<SDNode node> {
- def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs),
- [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>;
- def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs),
- [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>;
+multiclass UnaryInt<SDNode node, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$src),
+ [(set I32:$dst, (node I32:$src))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>;
+ def _I64 : I<(outs I64:$dst), (ins I64:$src),
+ [(set I64:$dst, (node I64:$src))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>;
+}
+multiclass BinaryInt<SDNode node, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs),
+ [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass UnaryFP<SDNode node, string name> {
+ def _F32 : I<(outs F32:$dst), (ins F32:$src),
+ [(set F32:$dst, (node F32:$src))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>;
+ def _F64 : I<(outs F64:$dst), (ins F64:$src),
+ [(set F64:$dst, (node F64:$src))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>;
+}
+multiclass BinaryFP<SDNode node, string name> {
+ def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs),
+ [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs),
+ [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonInt<CondCode cond, string name> {
+ def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+ !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs),
+ [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+ !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonFP<CondCode cond, string name> {
+ def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs),
+ [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+ !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+ def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs),
+ [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+ !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index ea8937c..5e7663c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -24,5 +24,136 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-instr-info"
+#define GET_INSTRINFO_CTOR_DTOR
+#include "WebAssemblyGenInstrInfo.inc"
+
WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
- : RI(STI.getTargetTriple()) {}
+ : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
+ WebAssembly::ADJCALLSTACKUP),
+ RI(STI.getTargetTriple()) {}
+
+void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) const {
+ // This method is called by post-RA expansion, which expects only pregs to
+ // exist. However we need to handle both here.
+ auto &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+ MRI.getRegClass(DestReg) :
+ MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg);
+
+ unsigned CopyLocalOpcode;
+ if (RC == &WebAssembly::I32RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32;
+ else if (RC == &WebAssembly::I64RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64;
+ else if (RC == &WebAssembly::F32RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32;
+ else if (RC == &WebAssembly::F64RegClass)
+ CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64;
+ else
+ llvm_unreachable("Unexpected register class");
+
+ BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg)
+ .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
+}
+
+// Branch analysis.
+bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool /*AllowModify*/) const {
+ bool HaveCond = false;
+ for (MachineInstr &MI : MBB.terminators()) {
+ switch (MI.getOpcode()) {
+ default:
+ // Unhandled instruction; bail out.
+ return true;
+ case WebAssembly::BR_IF:
+ if (HaveCond)
+ return true;
+ Cond.push_back(MachineOperand::CreateImm(true));
+ Cond.push_back(MI.getOperand(0));
+ TBB = MI.getOperand(1).getMBB();
+ HaveCond = true;
+ break;
+ case WebAssembly::BR_UNLESS:
+ if (HaveCond)
+ return true;
+ Cond.push_back(MachineOperand::CreateImm(false));
+ Cond.push_back(MI.getOperand(0));
+ TBB = MI.getOperand(1).getMBB();
+ HaveCond = true;
+ break;
+ case WebAssembly::BR:
+ if (!HaveCond)
+ TBB = MI.getOperand(0).getMBB();
+ else
+ FBB = MI.getOperand(0).getMBB();
+ break;
+ }
+ if (MI.isBarrier())
+ break;
+ }
+
+ return false;
+}
+
+unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::instr_iterator I = MBB.instr_end();
+ unsigned Count = 0;
+
+ while (I != MBB.instr_begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isTerminator())
+ break;
+ // Remove the branch.
+ I->eraseFromParent();
+ I = MBB.instr_end();
+ ++Count;
+ }
+
+ return Count;
+}
+
+unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const {
+ if (Cond.empty()) {
+ if (!TBB)
+ return 0;
+
+ BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB);
+ return 1;
+ }
+
+ assert(Cond.size() == 2 && "Expected a flag and a successor block");
+
+ if (Cond[0].getImm()) {
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF))
+ .addOperand(Cond[1])
+ .addMBB(TBB);
+ } else {
+ BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS))
+ .addOperand(Cond[1])
+ .addMBB(TBB);
+ }
+ if (!FBB)
+ return 1;
+
+ BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB);
+ return 2;
+}
+
+bool WebAssemblyInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Expected a flag and a successor block");
+ Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index 1c4ae22..5ddd9b3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -19,17 +19,35 @@
#include "WebAssemblyRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#define GET_INSTRINFO_HEADER
+#include "WebAssemblyGenInstrInfo.inc"
+
namespace llvm {
class WebAssemblySubtarget;
-class WebAssemblyInstrInfo final {
+class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
const WebAssemblyRegisterInfo RI;
public:
explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI);
const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
+
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+ DebugLoc DL) const override;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index fe3ca76..f0b4ce7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -25,20 +25,48 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
// WebAssembly-specific DAG Node Types.
//===----------------------------------------------------------------------===//
+def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>;
+def SDT_WebAssemblyCallSeqEnd :
+ SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
+def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
+def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def WebAssemblycallseq_start :
+ SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def WebAssemblycallseq_end :
+ SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
+ SDT_WebAssemblyCall0,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
+ SDT_WebAssemblyCall1,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH",
+ SDT_WebAssemblyTableswitch,
+ [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
+ SDT_WebAssemblyArgument>;
+def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
+ SDT_WebAssemblyReturn, [SDNPHasChain]>;
+def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
+ SDT_WebAssemblyWrapper>;
+
//===----------------------------------------------------------------------===//
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- *
- * get_local: read the current value of a local variable
- * set_local: set the current value of a local variable
-*/
+def bb_op : Operand<OtherVT>;
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
@@ -47,13 +75,86 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
include "WebAssemblyInstrFormats.td"
//===----------------------------------------------------------------------===//
+// Additional instructions.
+//===----------------------------------------------------------------------===//
+
+multiclass ARGUMENT<WebAssemblyRegClass vt> {
+ let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
+ def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
+ [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+}
+defm : ARGUMENT<I32>;
+defm : ARGUMENT<I64>;
+defm : ARGUMENT<F32>;
+defm : ARGUMENT<F64>;
+
+let Defs = [ARGUMENTS] in {
+
+// get_local and set_local are not generated by instruction selection; they
+// are implied by virtual register uses and defs in most contexts. However,
+// they are explicitly emitted for special purposes.
+multiclass LOCAL<WebAssemblyRegClass vt> {
+ def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [],
+ "get_local\t$res, $regno">;
+ // TODO: set_local returns its operand value
+ def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [],
+ "set_local\t$regno, $src">;
+
+ // COPY_LOCAL is not an actual instruction in wasm, but since we allow
+ // get_local and set_local to be implicit, we can have a COPY_LOCAL which
+ // is actually a no-op because all the work is done in the implied
+ // get_local and set_local.
+ let isAsCheapAsAMove = 1 in
+ def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [],
+ "copy_local\t$res, $src">;
+}
+defm : LOCAL<I32>;
+defm : LOCAL<I64>;
+defm : LOCAL<F32>;
+defm : LOCAL<F64>;
+
+let isMoveImm = 1 in {
+def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
+ [(set I32:$res, imm:$imm)],
+ "i32.const\t$res, $imm">;
+def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
+ [(set I64:$res, imm:$imm)],
+ "i64.const\t$res, $imm">;
+def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+ [(set F32:$res, fpimm:$imm)],
+ "f32.const\t$res, $imm">;
+def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+ [(set F64:$res, fpimm:$imm)],
+ "f64.const\t$res, $imm">;
+} // isMoveImm = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
+ (CONST_I32 tglobaladdr:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
+ (CONST_I32 texternalsym:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
+ (CONST_I32 tjumptable:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Function signature and local variable declaration "instructions".
+def PARAM : I<(outs), (ins variable_ops), [], ".param \t">;
+def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
+def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">;
+
+} // Defs = [ARGUMENTS]
+
+//===----------------------------------------------------------------------===//
// Additional sets of instructions.
//===----------------------------------------------------------------------===//
include "WebAssemblyInstrMemory.td"
include "WebAssemblyInstrCall.td"
+include "WebAssemblyInstrControl.td"
include "WebAssemblyInstrInteger.td"
-include "WebAssemblyInstrFloat.td"
include "WebAssemblyInstrConv.td"
+include "WebAssemblyInstrFloat.td"
include "WebAssemblyInstrAtomics.td"
include "WebAssemblyInstrSIMD.td"
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 5f60fe8..09e5eaf 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,34 +12,77 @@
///
//===----------------------------------------------------------------------===//
-defm ADD : BinaryInt<add>;
-defm SUB : BinaryInt<sub>;
-defm MUL : BinaryInt<mul>;
-defm SDIV : BinaryInt<sdiv>;
-defm UDIV : BinaryInt<udiv>;
-defm SREM : BinaryInt<srem>;
-defm UREM : BinaryInt<urem>;
-defm AND : BinaryInt<and>;
-defm IOR : BinaryInt<or>;
-defm XOR : BinaryInt<xor>;
-defm SHL : BinaryInt<shl>;
-defm SHR : BinaryInt<srl>;
-defm SAR : BinaryInt<sra>;
-
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * int32.eq: signed-less compare equal
- * int32.slt: signed less than
- * int32.sle: signed less than or equal
- * int32.ult: unsigned less than
- * int32.ule: unsigned less than or equal
- * int32.sgt: signed greater than
- * int32.sge: signed greater than or equal
- * int32.ugt: unsigned greater than
- * int32.uge: unsigned greater than or equal
- */
-
-defm CLZ : UnaryInt<ctlz>;
-defm CTZ : UnaryInt<cttz>;
-defm POPCNT : UnaryInt<ctpop>;
+let Defs = [ARGUMENTS] in {
+
+// The spaces after the names are for aesthetic purposes only, to make
+// operands line up vertically after tab expansion.
+let isCommutable = 1 in
+defm ADD : BinaryInt<add, "add ">;
+defm SUB : BinaryInt<sub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryInt<mul, "mul ">;
+// Divide and remainder trap on a zero denominator.
+let hasSideEffects = 1 in {
+defm DIV_S : BinaryInt<sdiv, "div_s">;
+defm DIV_U : BinaryInt<udiv, "div_u">;
+defm REM_S : BinaryInt<srem, "rem_s">;
+defm REM_U : BinaryInt<urem, "rem_u">;
+} // hasSideEffects = 1
+let isCommutable = 1 in {
+defm AND : BinaryInt<and, "and ">;
+defm OR : BinaryInt<or, "or ">;
+defm XOR : BinaryInt<xor, "xor ">;
+} // isCommutable = 1
+defm SHL : BinaryInt<shl, "shl ">;
+defm SHR_U : BinaryInt<srl, "shr_u">;
+defm SHR_S : BinaryInt<sra, "shr_s">;
+
+let isCommutable = 1 in {
+defm EQ : ComparisonInt<SETEQ, "eq ">;
+defm NE : ComparisonInt<SETNE, "ne ">;
+} // isCommutable = 1
+defm LT_S : ComparisonInt<SETLT, "lt_s">;
+defm LE_S : ComparisonInt<SETLE, "le_s">;
+defm LT_U : ComparisonInt<SETULT, "lt_u">;
+defm LE_U : ComparisonInt<SETULE, "le_u">;
+defm GT_S : ComparisonInt<SETGT, "gt_s">;
+defm GE_S : ComparisonInt<SETGE, "ge_s">;
+defm GT_U : ComparisonInt<SETUGT, "gt_u">;
+defm GE_U : ComparisonInt<SETUGE, "ge_u">;
+
+defm CLZ : UnaryInt<ctlz, "clz ">;
+defm CTZ : UnaryInt<cttz, "ctz ">;
+defm POPCNT : UnaryInt<ctpop, "popcnt">;
+
+} // Defs = [ARGUMENTS]
+
+// Expand the "don't care" operations to supported operations.
+def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>;
+def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>;
+def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>;
+def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs),
+ [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
+ "i32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs),
+ [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
+ "i64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
+ (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
+ (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 5ab40e8..74ec45d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -12,35 +12,500 @@
///
//===----------------------------------------------------------------------===//
-/*
- * TODO(jfb): Add the following.
- * Each has optional alignment and immediate byte offset.
- *
- * int32.load_sx[int8]: sign-extend to int32
- * int32.load_sx[int16]: sign-extend to int32
- * int32.load_zx[int8]: zero-extend to int32
- * int32.load_zx[int16]: zero-extend to int32
- * int32.load[int32]: (no conversion)
- * int64.load_sx[int8]: sign-extend to int64
- * int64.load_sx[int16]: sign-extend to int64
- * int64.load_sx[int32]: sign-extend to int64
- * int64.load_zx[int8]: zero-extend to int64
- * int64.load_zx[int16]: zero-extend to int64
- * int64.load_zx[int32]: zero-extend to int64
- * int64.load[int64]: (no conversion)
- * float32.load[float32]: (no conversion)
- * float64.load[float64]: (no conversion)
- *
- * int32.store[int8]: wrap int32 to int8
- * int32.store[int16]: wrap int32 to int16
- * int32.store[int32]: (no conversion)
- * int64.store[int8]: wrap int64 to int8
- * int64.store[int16]: wrap int64 to int16
- * int64.store[int32]: wrap int64 to int32
- * int64.store[int64]: (no conversion)
- * float32.store[float32]: (no conversion)
- * float64.store[float64]: (no conversion)
- *
- * load_global: load the value of a given global variable
- * store_global: store a given value to a given global variable
- */
+// TODO:
+// - HasAddr64
+// - WebAssemblyTargetLowering having to do with atomics
+// - Each has optional alignment.
+
+// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16
+// local types. These memory-only types instead zero- or sign-extend into local
+// types when loading, and truncate when storing.
+
+// WebAssembly constant offsets are performed as unsigned with infinite
+// precision, so we need to check for NoUnsignedWrap so that we don't fold an
+// offset for an add that needs wrapping.
+def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+ (add node:$addr, node:$off),
+ [{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic load.
+def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load\t$dst, ${off}(${addr})">;
+def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load\t$dst, ${off}(${addr})">;
+def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "f32.load\t$dst, ${off}(${addr})">;
+def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "f64.load\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select loads with no constant offset.
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
+
+// Select loads with a constant offset.
+def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_I32 imm:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_I64 imm:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_F32 imm:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+ (LOAD_F64 imm:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_F32 tglobaladdr:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD_F64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_I64 texternalsym:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_F32 texternalsym:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD_F64 texternalsym:$off, $addr)>;
+
+// Select loads with just a constant offset.
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_F32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
+ (LOAD_F64 texternalsym:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Extending load.
+def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i32.load16_u\t$dst, ${off}(${addr})">;
+def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load16_u\t$dst, ${off}(${addr})">;
+def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load32_s\t$dst, ${off}(${addr})">;
+def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+ "i64.load32_u\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select extending loads with no constant offset.
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select extending loads with a constant offset.
+def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select extending loads with just a constant offset.
+def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>;
+
+// Resolve "don't care" extending loads to zero-extending loads. This is
+// somewhat arbitrary, but zero-extending is conceptually simpler.
+
+// Select "don't care" extending loads with no constant offset.
+def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select "don't care" extending loads with a constant offset.
+def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+ (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+ (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+ (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select "don't care" extending loads with just a constant offset.
+def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
+ (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic store.
+// Note that we split the patterns out of the instruction definitions because
+// WebAssembly's stores return their operand value, and tablegen doesn't like
+// instruction definition patterns that don't reference all of the output
+// operands.
+// Note: WebAssembly inverts SelectionDAG's usual operand order.
+def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [],
+ "f32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [],
+ "f64.store\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select stores with no constant offset.
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
+
+// Select stores with a constant offset.
+def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_F32 imm:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE_F64 imm:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
+
+// Select stores with just a constant offset.
+def : Pat<(store I32:$val, imm:$off),
+ (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, imm:$off),
+ (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, imm:$off),
+ (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, imm:$off),
+ (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Truncating store.
+def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+ "i32.store16\t$dst, ${off}(${addr}), $val">;
+def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store16\t$dst, ${off}(${addr}), $val">;
+def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+ "i64.store32\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select truncating stores with no constant offset.
+def : Pat<(truncstorei8 I32:$val, I32:$addr),
+ (STORE8_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, I32:$addr),
+ (STORE16_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, I32:$addr),
+ (STORE8_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, I32:$addr),
+ (STORE16_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, I32:$addr),
+ (STORE32_I64 0, I32:$addr, I64:$val)>;
+
+// Select truncating stores with a constant offset.
+def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+ (STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+ (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+ (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+
+// Select truncating stores with just a constant offset.
+def : Pat<(truncstorei8 I32:$val, imm:$off),
+ (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, imm:$off),
+ (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, imm:$off),
+ (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, imm:$off),
+ (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, imm:$off),
+ (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+ (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Memory size.
+def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins),
+ [(set I32:$dst, (int_wasm_memory_size))],
+ "memory_size\t$dst">,
+ Requires<[HasAddr32]>;
+def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins),
+ [(set I64:$dst, (int_wasm_memory_size))],
+ "memory_size\t$dst">,
+ Requires<[HasAddr64]>;
+
+// Grow memory.
+def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta),
+ [(int_wasm_grow_memory I32:$delta)],
+ "grow_memory\t$delta">,
+ Requires<[HasAddr32]>;
+def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta),
+ [(int_wasm_grow_memory I64:$delta)],
+ "grow_memory\t$delta">,
+ Requires<[HasAddr64]>;
+
+} // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
new file mode 100644
index 0000000..b009a4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -0,0 +1,133 @@
+//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file lowers br_unless into br_if with an inverted condition.
+///
+/// br_unless is not currently in the spec, but it's very convenient for LLVM
+/// to use. This pass allows LLVM to use it, for now.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-br_unless"
+
+namespace {
+class WebAssemblyLowerBrUnless final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Lower br_unless";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyLowerBrUnless::ID = 0;
+FunctionPass *llvm::createWebAssemblyLowerBrUnless() {
+ return new WebAssemblyLowerBrUnless();
+}
+
+bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Lowering br_unless **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ auto &MRI = MF.getRegInfo();
+
+ for (auto &MBB : MF) {
+ for (auto MII = MBB.begin(); MII != MBB.end(); ) {
+ MachineInstr *MI = &*MII++;
+ if (MI->getOpcode() != WebAssembly::BR_UNLESS)
+ continue;
+
+ unsigned Cond = MI->getOperand(0).getReg();
+ bool Inverted = false;
+
+ // Attempt to invert the condition in place.
+ if (MFI.isVRegStackified(Cond)) {
+ assert(MRI.hasOneDef(Cond));
+ MachineInstr *Def = MRI.getVRegDef(Cond);
+ switch (Def->getOpcode()) {
+ using namespace WebAssembly;
+ case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
+ case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
+ case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
+ case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
+ case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
+ case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
+ case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
+ case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
+ case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
+ case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
+ case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
+ case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
+ case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
+ case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
+ case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
+ case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
+ case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
+ case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
+ case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
+ case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
+ case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
+ case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
+ case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
+ case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+ default: break;
+ }
+ }
+
+ // If we weren't able to invert the condition in place. Insert an
+ // expression to invert it.
+ if (!Inverted) {
+ unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MFI.stackifyVReg(ZeroReg);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg)
+ .addImm(0);
+ unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ MFI.stackifyVReg(Tmp);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp)
+ .addReg(Cond)
+ .addReg(ZeroReg);
+ Cond = Tmp;
+ Inverted = true;
+ }
+
+ // The br_unless condition has now been inverted. Insert a br_if and
+ // delete the br_unless.
+ assert(Inverted);
+ BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
+ .addReg(Cond)
+ .addOperand(MI->getOperand(1));
+ MBB.erase(MI);
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
new file mode 100644
index 0000000..a953f82
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -0,0 +1,106 @@
+// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst //
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower WebAssembly MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSymbol *
+WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ return Printer.getSymbol(MO.getGlobal());
+}
+
+MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
+ const MachineOperand &MO) const {
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+
+ int64_t Offset = MO.getOffset();
+ if (Offset != 0) {
+ assert(!MO.isJTI() && "Unexpected offset with jump table index");
+ Expr =
+ MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+ }
+
+ return MCOperand::createExpr(Expr);
+}
+
+void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
+ MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register: {
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ continue;
+ const WebAssemblyFunctionInfo &MFI =
+ *MI->getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
+ unsigned WAReg = MFI.getWAReg(MO.getReg());
+ MCOp = MCOperand::createReg(WAReg);
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_FPImmediate: {
+ // TODO: MC converts all floating point immediate operands to double.
+ // This is fine for numeric values, but may cause NaNs to change bits.
+ const ConstantFP *Imm = MO.getFPImm();
+ if (Imm->getType()->isFloatTy())
+ MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat());
+ else if (Imm->getType()->isDoubleTy())
+ MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble());
+ else
+ llvm_unreachable("unknown floating point immediate type");
+ break;
+ }
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
new file mode 100644
index 0000000..6d70470
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -0,0 +1,45 @@
+//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the class to lower WebAssembly MachineInstrs to
+/// their corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+
+/// This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
+ MCContext &Ctx;
+ AsmPrinter &Printer;
+
+ MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+ MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+
+public:
+ WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer) {}
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 542d984..225c5d3 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -17,3 +17,9 @@
using namespace llvm;
WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
+
+void WebAssemblyFunctionInfo::initWARegs() {
+ assert(WARegs.empty());
+ unsigned Reg = UnusedReg;
+ WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index fc5e910..6a60280 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*-
+// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*-
//
// The LLVM Compiler Infrastructure
//
@@ -16,8 +16,7 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
-#include "WebAssemblyRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -27,9 +26,70 @@ namespace llvm {
class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
MachineFunction &MF;
+ std::vector<MVT> Params;
+
+ /// A mapping from CodeGen vreg index to WebAssembly register number.
+ std::vector<unsigned> WARegs;
+
+ /// A mapping from CodeGen vreg index to a boolean value indicating whether
+ /// the given register is considered to be "stackified", meaning it has been
+ /// determined or made to meet the stack requirements:
+ /// - single use (per path)
+ /// - single def (per path)
+ /// - defined and used in LIFO order with other stack registers
+ BitVector VRegStackified;
+
+ // One entry for each possible target reg. we expect it to be small.
+ std::vector<unsigned> PhysRegs;
+
public:
- explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+ explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {
+ PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U);
+ }
~WebAssemblyFunctionInfo() override;
+
+ void addParam(MVT VT) { Params.push_back(VT); }
+ const std::vector<MVT> &getParams() const { return Params; }
+
+ static const unsigned UnusedReg = -1u;
+
+ void stackifyVReg(unsigned VReg) {
+ if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+ VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
+ VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
+ }
+ bool isVRegStackified(unsigned VReg) const {
+ if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+ return false;
+ return VRegStackified.test(TargetRegisterInfo::virtReg2Index(VReg));
+ }
+
+ void initWARegs();
+ void setWAReg(unsigned VReg, unsigned WAReg) {
+ assert(WAReg != UnusedReg);
+ assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size());
+ WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg;
+ }
+ unsigned getWAReg(unsigned Reg) const {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
+ return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
+ }
+ return PhysRegs[Reg];
+ }
+ // If new virtual registers are created after initWARegs has been called,
+ // this function can be used to add WebAssembly register mappings for them.
+ void addWAReg(unsigned VReg, unsigned WAReg) {
+ assert(VReg = WARegs.size());
+ WARegs.push_back(WAReg);
+ }
+
+ void addPReg(unsigned PReg, unsigned WAReg) {
+ assert(PReg < WebAssembly::NUM_TARGET_REGS);
+ assert(WAReg < -1U);
+ PhysRegs[PReg] = WAReg;
+ }
+ const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; }
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
new file mode 100644
index 0000000..4dc401a
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -0,0 +1,76 @@
+//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize calls with "returned" attributes for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-returned"
+
+namespace {
+class OptimizeReturned final : public FunctionPass,
+ public InstVisitor<OptimizeReturned> {
+ const char *getPassName() const override {
+ return "WebAssembly Optimize Returned";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ DominatorTree *DT;
+
+public:
+ static char ID;
+ OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+
+ void visitCallSite(CallSite CS);
+};
+} // End anonymous namespace
+
+char OptimizeReturned::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
+ return new OptimizeReturned();
+}
+
+void OptimizeReturned::visitCallSite(CallSite CS) {
+ for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
+ if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+ Instruction *Inst = CS.getInstruction();
+ Value *Arg = CS.getArgOperand(i);
+ // Ignore constants, globals, undef, etc.
+ if (isa<Constant>(Arg))
+ continue;
+ // Like replaceDominatedUsesWith but using Instruction/Use dominance.
+ for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
+ Use &U = *UI++;
+ if (DT->dominates(Inst, U))
+ U.set(Inst);
+ }
+ }
+}
+
+bool OptimizeReturned::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ visit(F);
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp
new file mode 100644
index 0000000..d570d42
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPEI.cpp
@@ -0,0 +1,1066 @@
+//===-- WebAssemblyPEI.cpp - Insert Prolog/Epilog code in function --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation. After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This is a copy of lib/CodeGen/PrologEpilogInserter.cpp except that it does
+// not assert that all virtual registers are gone (because WebAssembly currently
+// uses virtual rather than physical registers), and only runs
+// MRI.clearVirtRegs() if scavenging happened (which it never does). It also
+// uses a different class name so it can be registered via INITIALIZE_PASS.
+// It is otherwise unmodified, so any changes to the target-independent PEI
+// can be easily applied.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pei"
+namespace llvm {
+void initializeWasmPEIPass(PassRegistry&);
+}
+namespace {
+class WasmPEI : public MachineFunctionPass {
+public:
+ static char ID;
+ WasmPEI() : MachineFunctionPass(ID) {
+ initializeWasmPEIPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+ /// frame indexes with appropriate references.
+ ///
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+ RegScavenger *RS;
+
+ // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+ // stack frame indexes.
+ unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+ // Save and Restore blocks of the current function. Typically there is a
+ // single save block, unless Windows EH funclets are involved.
+ SmallVector<MachineBasicBlock *, 1> SaveBlocks;
+ SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+
+ // Flag to control whether to use the register scavenger to resolve
+ // frame index materialization registers. Set according to
+ // TRI->requiresFrameIndexScavenging() for the current function.
+ bool FrameIndexVirtualScavenging;
+
+ void calculateSets(MachineFunction &Fn);
+ void calculateCallsInformation(MachineFunction &Fn);
+ void assignCalleeSavedSpillSlots(MachineFunction &Fn,
+ const BitVector &SavedRegs);
+ void insertCSRSpillsAndRestores(MachineFunction &Fn);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ void replaceFrameIndices(MachineFunction &Fn);
+ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj);
+ void scavengeFrameVirtualRegs(MachineFunction &Fn);
+ void insertPrologEpilogCode(MachineFunction &Fn);
+};
+} // namespace
+
+char WasmPEI::ID = 0;
+
+namespace llvm {
+FunctionPass *createWebAssemblyPEI() {
+ return new WasmPEI();
+}
+}
+
+static cl::opt<unsigned>
+WarnStackSize("wasm-warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
+ cl::desc("Warn for stack size bigger than the given"
+ " number"));
+
+INITIALIZE_PASS_BEGIN(WasmPEI, "wasmprologepilog",
+ "Wasm Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(WasmPEI, "wasmprologepilog",
+ "Wasm Prologue/Epilogue Insertion & Frame Finalization",
+ false, false)
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+ "Number of bytes used for stack in all functions");
+
+void WasmPEI::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<StackProtector>();
+ AU.addRequired<TargetPassConfig>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Compute the set of return blocks
+void WasmPEI::calculateSets(MachineFunction &Fn) {
+ const MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Even when we do not change any CSR, we still want to insert the
+ // prologue and epilogue of the function.
+ // So set the save points for those.
+
+ // Use the points found by shrink-wrapping, if any.
+ if (MFI->getSavePoint()) {
+ SaveBlocks.push_back(MFI->getSavePoint());
+ assert(MFI->getRestorePoint() && "Both restore and save must be set");
+ MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ return;
+ }
+
+ // Save refs to entry and return blocks.
+ SaveBlocks.push_back(&Fn.front());
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.isEHFuncletEntry())
+ SaveBlocks.push_back(&MBB);
+ if (MBB.isReturnBlock())
+ RestoreBlocks.push_back(&MBB);
+ }
+}
+
+/// StackObjSet - A set of stack object indexes
+typedef SmallSetVector<int, 8> StackObjSet;
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool WasmPEI::runOnMachineFunction(MachineFunction &Fn) {
+ const Function* F = Fn.getFunction();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+
+ // LOCALMOD: assert removed from target-independent PEI
+ //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
+ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
+ calculateCallsInformation(Fn);
+
+ // Determine which of the registers in the callee save list should be saved.
+ BitVector SavedRegs;
+ TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+
+ // Insert spill code for any callee saved registers that are modified.
+ assignCalleeSavedSpillSlots(Fn, SavedRegs);
+
+ // Determine placement of CSR spill/restore code:
+ // place all spills in the entry block, all restores in return blocks.
+ calculateSets(Fn);
+
+ // Add the code to save and restore the callee saved registers.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertCSRSpillsAndRestores(Fn);
+
+ // Allow the target machine to make final modifications to the function
+ // before the frame layout is finalized.
+ TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+ // Calculate actual frame offsets for all abstract stack objects...
+ calculateFrameObjectOffsets(Fn);
+
+ // Add prolog and epilog code to the function. This function is required
+ // to align the stack frame as necessary for any stack variables or
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
+ // and MaxCallFrameSize variables.
+ if (!F->hasFnAttribute(Attribute::Naked))
+ insertPrologEpilogCode(Fn);
+
+ // Replace all MO_FrameIndex operands with physical register references
+ // and actual offsets.
+ //
+ replaceFrameIndices(Fn);
+
+ // If register scavenging is needed, as we've enabled doing it as a
+ // post-pass, scavenge the virtual registers that frame index elimination
+ // inserted.
+ if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
+ scavengeFrameVirtualRegs(Fn);
+ // Clear any vregs created by virtual scavenging.
+ // LOCALMOD: made this call conditional with scavengeFrameVirtualregs()
+ Fn.getRegInfo().clearVirtRegs();
+ }
+
+ // Warn on stack size when we exceeds the given limit.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ uint64_t StackSize = MFI->getStackSize();
+ if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
+ DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
+ F->getContext().diagnose(DiagStackSize);
+ }
+
+ delete RS;
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
+ return true;
+}
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void WasmPEI::calculateCallsInformation(MachineFunction &Fn) {
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ unsigned MaxCallFrameSize = 0;
+ bool AdjustsStack = MFI->adjustsStack();
+
+ // Get the function call frame set-up and tear-down instruction opcode
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ // Early exit for targets which have no call frame setup/destroy pseudo
+ // instructions.
+ if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+ return;
+
+ std::vector<MachineBasicBlock::iterator> FrameSDOps;
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+ " instructions should have a single immediate argument!");
+ unsigned Size = I->getOperand(0).getImm();
+ if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+ AdjustsStack = true;
+ FrameSDOps.push_back(I);
+ } else if (I->isInlineAsm()) {
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ AdjustsStack = true;
+ }
+
+ MFI->setAdjustsStack(AdjustsStack);
+ MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+ for (std::vector<MachineBasicBlock::iterator>::iterator
+ i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+ MachineBasicBlock::iterator I = *i;
+
+ // If call frames are not being included as part of the stack frame, and
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (TFI->canSimplifyCallFramePseudos(Fn))
+ TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+ }
+}
+
+void WasmPEI::assignCalleeSavedSpillSlots(MachineFunction &F,
+ const BitVector &SavedRegs) {
+ // These are used to keep track the callee-save area. Initialize them.
+ MinCSFrameIndex = INT_MAX;
+ MaxCSFrameIndex = 0;
+
+ if (SavedRegs.empty())
+ return;
+
+ const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+ std::vector<CalleeSavedInfo> CSI;
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned Reg = CSRegs[i];
+ if (SavedRegs.test(Reg))
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
+
+ const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
+
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
+
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+ I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
+
+ I->setFrameIdx(FrameIdx);
+ }
+ }
+
+ MFI->setCalleeSavedInfo(CSI);
+}
+
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Visited will contain all the basic blocks that are in the region
+ // where the callee saved registers are alive:
+ // - Anything that is not Save or Restore -> LiveThrough.
+ // - Save -> LiveIn.
+ // - Restore -> LiveOut.
+ // The live-out is not attached to the block, so no need to keep
+ // Restore in this set.
+ SmallPtrSet<MachineBasicBlock *, 8> Visited;
+ SmallVector<MachineBasicBlock *, 8> WorkList;
+ MachineBasicBlock *Entry = &MF.front();
+ MachineBasicBlock *Save = MFI->getSavePoint();
+
+ if (!Save)
+ Save = Entry;
+
+ if (Entry != Save) {
+ WorkList.push_back(Entry);
+ Visited.insert(Entry);
+ }
+ Visited.insert(Save);
+
+ MachineBasicBlock *Restore = MFI->getRestorePoint();
+ if (Restore)
+ // By construction Restore cannot be visited, otherwise it
+ // means there exists a path to Restore that does not go
+ // through Save.
+ WorkList.push_back(Restore);
+
+ while (!WorkList.empty()) {
+ const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+ // By construction, the region that is after the save point is
+ // dominated by the Save and post-dominated by the Restore.
+ if (CurBB == Save && Save != Restore)
+ continue;
+ // Enqueue all the successors not already visited.
+ // Those are by construction either before Save or after Restore.
+ for (MachineBasicBlock *SuccBB : CurBB->successors())
+ if (Visited.insert(SuccBB).second)
+ WorkList.push_back(SuccBB);
+ }
+
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = CSI[i].getReg();
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ if (!MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
+ }
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function.
+///
+void WasmPEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+ // Get callee saved register information.
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+ MFI->setCalleeSavedInfoValid(true);
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty())
+ return;
+
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ MachineBasicBlock::iterator I;
+
+ // Spill using target interface.
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ I = SaveBlock->begin();
+ if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ // Insert the spill to the stack frame.
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+ }
+ // Update the live-in information of all the blocks up to the save point.
+ updateLiveness(Fn);
+ }
+
+ // Restore using target interface.
+ for (MachineBasicBlock *MBB : RestoreBlocks) {
+ I = MBB->end();
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = I;
+ while (I2 != MBB->begin() && (--I2)->isTerminator())
+ I = I2;
+
+ bool AtStart = I == MBB->begin();
+ MachineBasicBlock::iterator BeforeI = I;
+ if (!AtStart)
+ --BeforeI;
+
+ // Restore all registers immediately before the return and any
+ // terminators that precede it.
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ assert(I != MBB->begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ if (AtStart)
+ I = MBB->begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+ }
+ }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+ bool StackGrowsDown, int64_t &Offset,
+ unsigned &MaxAlign, unsigned Skew) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+ } else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+ MFI->setObjectOffset(FrameIdx, Offset);
+ Offset += MFI->getObjectSize(FrameIdx);
+ }
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void
+AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+ SmallSet<int, 16> &ProtectedObjs,
+ MachineFrameInfo *MFI, bool StackGrowsDown,
+ int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
+
+ for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+ E = UnassignedObjs.end(); I != E; ++I) {
+ int i = *I;
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ ProtectedObjs.insert(i);
+ }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void WasmPEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ StackProtector *SP = &getAnalysis<StackProtector>();
+
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+ // Start at the beginning of the local area.
+ // The Offset is the distance from the stack top in the direction
+ // of stack growth -- so it's always nonnegative.
+ int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+ if (StackGrowsDown)
+ LocalAreaOffset = -LocalAreaOffset;
+ assert(LocalAreaOffset >= 0
+ && "Local area offset should be in direction of stack growth");
+ int64_t Offset = LocalAreaOffset;
+
+ // Skew to be applied to alignment.
+ unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
+ // If there are fixed sized objects that are preallocated in the local area,
+ // non-fixed objects can't be allocated right at the start of local area.
+ // We currently don't support filling in holes in between fixed sized
+ // objects, so we adjust 'Offset' to point to the end of last fixed sized
+ // preallocated object.
+ for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+ int64_t FixedOff;
+ if (StackGrowsDown) {
+ // The maximum distance from the stack pointer is at lower address of
+ // the object -- which is given by offset. For down growing stack
+ // the offset is negative, so we negate the offset to get the distance.
+ FixedOff = -MFI->getObjectOffset(i);
+ } else {
+ // The maximum distance from the start pointer is at the upper
+ // address of the object.
+ FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+ }
+ if (FixedOff > Offset) Offset = FixedOff;
+ }
+
+ // First assign frame offsets to stack objects that are used to spill
+ // callee saved registers.
+ if (StackGrowsDown) {
+ for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ // If the stack grows down, we need to add the size to find the lowest
+ // address of the object.
+ Offset += MFI->getObjectSize(i);
+
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ MFI->setObjectOffset(i, -Offset); // Set the computed offset
+ }
+ } else {
+ int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+ for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+ unsigned Align = MFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ MFI->setObjectOffset(i, Offset);
+ Offset += MFI->getObjectSize(i);
+ }
+ }
+
+ unsigned MaxAlign = MFI->getMaxAlignment();
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // incoming stack pointer if a frame pointer is required and is closer
+ // to the incoming rather than the final stack pointer.
+ const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
+ bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+ TFI.isFPCloseToIncomingSP() &&
+ RegInfo->useFPForScavengingIndex(Fn) &&
+ !RegInfo->needsStackRealignment(Fn));
+ if (RS && EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> ProtectedObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ StackObjSet LargeArrayObjs;
+ StackObjSet SmallArrayObjs;
+ StackObjSet AddrOfObjs;
+
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+ Offset, MaxAlign, Skew);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+
+ switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+ case StackProtector::SSPLK_None:
+ continue;
+ case StackProtector::SSPLK_SmallArray:
+ SmallArrayObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_AddrOf:
+ AddrOfObjs.insert(i);
+ continue;
+ case StackProtector::SSPLK_LargeArray:
+ LargeArrayObjs.insert(i);
+ continue;
+ }
+ llvm_unreachable("Unexpected SSPLayoutKind.");
+ }
+
+ AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+ Offset, MaxAlign, Skew);
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && RS->isScavengingFrameIndex((int)i))
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (ProtectedObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ // Make sure the special register scavenging spill slot is closest to the
+ // stack pointer.
+ if (RS && !EarlyScavengingSlots) {
+ SmallVector<int, 2> SFIs;
+ RS->getScavengingFrameIndices(SFIs);
+ for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+ IE = SFIs.end(); I != IE; ++I)
+ AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+ }
+
+ if (!TFI.targetHandlesStackFrameRounding()) {
+ // If we have reserved argument space for call sites in the function
+ // immediately on entry to the current function, count it as part of the
+ // overall stack size.
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+ Offset += MFI->getMaxCallFrameSize();
+
+ // Round up the size to a multiple of the alignment. If the function has
+ // any calls or alloca's, align to the target's StackAlignment value to
+ // ensure that the callee's frame or the alloca data is suitably aligned;
+ // otherwise, for leaf functions, align to the TransientStackAlignment
+ // value.
+ unsigned StackAlign;
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+ (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+ StackAlign = TFI.getStackAlignment();
+ else
+ StackAlign = TFI.getTransientStackAlignment();
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
+ StackAlign = std::max(StackAlign, MaxAlign);
+ Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
+ }
+
+ // Update frame info to pretend that this is part of the stack...
+ int64_t StackSize = Offset - LocalAreaOffset;
+ MFI->setStackSize(StackSize);
+ NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void WasmPEI::insertPrologEpilogCode(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+
+ // Add prologue to the function...
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.emitPrologue(Fn, *SaveBlock);
+
+ // Add epilogue to restore the callee-save registers in each exiting block.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+ TFI.emitEpilogue(Fn, *RestoreBlock);
+
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.inlineStackProbe(Fn, *SaveBlock);
+
+ // Emit additional code that is required to support segmented stacks, if
+ // we've been asked for it. This, when linked with a runtime with support
+ // for segmented stacks (libgcc is one), will result in allocating stack
+ // space in small chunks instead of one large contiguous block.
+ if (Fn.shouldSplitStack()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+ }
+
+ // Emit additional code that is required to explicitly handle the stack in
+ // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+ // approach is rather similar to that of Segmented Stacks, but it uses a
+ // different conditional check and another BIF for allocating more stack
+ // space.
+ if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void WasmPEI::replaceFrameIndices(MachineFunction &Fn) {
+ const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(Fn)) return;
+
+ // Store SPAdj at exit of a basic block.
+ SmallVector<int, 8> SPState;
+ SPState.resize(Fn.getNumBlockIDs());
+ SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+ // Iterate over the reachable blocks in DFS order.
+ for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+ DFI != DFE; ++DFI) {
+ int SPAdj = 0;
+ // Check the exit state of the DFS stack predecessor.
+ if (DFI.getPathLength() >= 2) {
+ MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+ assert(Reachable.count(StackPred) &&
+ "DFS stack predecessor is already visited.\n");
+ SPAdj = SPState[StackPred->getNumber()];
+ }
+ MachineBasicBlock *BB = *DFI;
+ replaceFrameIndices(BB, Fn, SPAdj);
+ SPState[BB->getNumber()] = SPAdj;
+ }
+
+ // Handle the unreachable blocks.
+ for (auto &BB : Fn) {
+ if (Reachable.count(&BB))
+ // Already handled in DFS traversal.
+ continue;
+ int SPAdj = 0;
+ replaceFrameIndices(&BB, Fn, SPAdj);
+ }
+}
+
+void WasmPEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+ int &SPAdj) {
+ assert(Fn.getSubtarget().getRegisterInfo() &&
+ "getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+ const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+ unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+ unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+ if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+ bool InsideCallSequence = false;
+
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+ if (I->getOpcode() == FrameSetupOpcode ||
+ I->getOpcode() == FrameDestroyOpcode) {
+ InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ SPAdj += TII.getSPAdjust(I);
+
+ MachineBasicBlock::iterator PrevI = BB->end();
+ if (I != BB->begin()) PrevI = std::prev(I);
+ TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+ // Visit the instructions created by eliminateCallFramePseudoInstr().
+ if (PrevI == BB->end())
+ I = BB->begin(); // The replaced instr was the first in the block.
+ else
+ I = std::next(PrevI);
+ continue;
+ }
+
+ MachineInstr *MI = I;
+ bool DoIncr = true;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (!MI->getOperand(i).isFI())
+ continue;
+
+ // Frame indices in debug values are encoded in a target independent
+ // way with simply the frame index and offset rather than any
+ // target-specific addressing mode.
+ if (MI->isDebugValue()) {
+ assert(i == 0 && "Frame indices can only appear as the first "
+ "operand of a DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI->getOperand(1);
+ Offset.setImm(Offset.getImm() +
+ TFI->getFrameIndexReference(
+ Fn, MI->getOperand(0).getIndex(), Reg));
+ MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // TODO: This code should be commoned with the code for
+ // PATCHPOINT. There's no good reason for the difference in
+ // implementation other than historical accident. The only
+ // remaining difference is the unconditional use of the stack
+ // pointer as the base register.
+ if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+ assert((!MI->isDebugValue() || i == 0) &&
+ "Frame indicies can only appear as the first operand of a "
+ "DBG_VALUE machine instruction");
+ unsigned Reg;
+ MachineOperand &Offset = MI->getOperand(i + 1);
+ const unsigned refOffset =
+ TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
+ Reg);
+
+ Offset.setImm(Offset.getImm() + refOffset);
+ MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+ continue;
+ }
+
+ // Some instructions (e.g. inline asm instructions) can have
+ // multiple frame indices and/or cause eliminateFrameIndex
+ // to insert more than one instruction. We need the register
+ // scavenger to go through all of these instructions so that
+ // it can update its register information. We keep the
+ // iterator at the point before insertion so that we can
+ // revisit them in full.
+ bool AtBeginning = (I == BB->begin());
+ if (!AtBeginning) --I;
+
+ // If this instruction has a FrameIndex operand, we need to
+ // use that target machine register info object to eliminate
+ // it.
+ TRI.eliminateFrameIndex(MI, SPAdj, i,
+ FrameIndexVirtualScavenging ? nullptr : RS);
+
+ // Reset the iterator if we were at the beginning of the BB.
+ if (AtBeginning) {
+ I = BB->begin();
+ DoIncr = false;
+ }
+
+ MI = nullptr;
+ break;
+ }
+
+ // If we are looking at a call sequence, we need to keep track of
+ // the SP adjustment made by each instruction in the sequence.
+ // This includes both the frame setup/destroy pseudos (handled above),
+ // as well as other instructions that have side effects w.r.t the SP.
+ // Note that this must come after eliminateFrameIndex, because
+ // if I itself referred to a frame index, we shouldn't count its own
+ // adjustment.
+ if (MI && InsideCallSequence)
+ SPAdj += TII.getSPAdjust(MI);
+
+ if (DoIncr && I != BB->end()) ++I;
+
+ // Update register states.
+ if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+ }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+void
+WasmPEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+ // Run through the instructions and find any virtual registers.
+ for (MachineFunction::iterator BB = Fn.begin(),
+ E = Fn.end(); BB != E; ++BB) {
+ RS->enterBasicBlock(&*BB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check BB->end()
+ // directly.
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in BB.
+ if (I == MachineBasicBlock::iterator(nullptr))
+ I = BB->begin();
+
+ MachineInstr *MI = I;
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator P =
+ I == BB->begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS->forward(I);
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).isReg()) {
+ MachineOperand &MO = MI->getOperand(i);
+ unsigned Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MI->getOperand(i).isDef() &&
+ "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+ unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert (ScratchReg && "Missing scratch register!");
+ Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS->setRegUsed(ScratchReg);
+ }
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != std::prev(J)) {
+ BB->splice(J, &*BB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS->getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS->unprocess(P);
+ } else
+ ++I;
+ }
+ }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
new file mode 100644
index 0000000..4ad6eed
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -0,0 +1,86 @@
+//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Late peephole optimizations for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-peephole"
+
+namespace {
+class WebAssemblyPeephole final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly late peephole optimizer";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID;
+ WebAssemblyPeephole() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyPeephole::ID = 0;
+FunctionPass *llvm::createWebAssemblyPeephole() {
+ return new WebAssemblyPeephole();
+}
+
+bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64: {
+ // Store instructions return their value operand. If we ended up using
+ // the same register for both, replace it with a dead def so that it
+ // can use $discard instead.
+ MachineOperand &MO = MI.getOperand(0);
+ unsigned OldReg = MO.getReg();
+ // TODO: Handle SP/physregs
+ if (OldReg == MI.getOperand(3).getReg()
+ && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) {
+ Changed = true;
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ MO.setReg(NewReg);
+ MO.setIsDead();
+ MFI.stackifyVReg(NewReg);
+ MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
new file mode 100644
index 0000000..9ec6659
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -0,0 +1,175 @@
+//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a virtual register coloring pass.
+///
+/// WebAssembly doesn't have a fixed number of registers, but it is still
+/// desirable to minimize the total number of registers used in each function.
+///
+/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-coloring"
+
+namespace {
+class WebAssemblyRegColoring final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegColoring() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Register Coloring";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreservedID(MachineDominatorsID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyRegColoring::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegColoring() {
+ return new WebAssemblyRegColoring();
+}
+
+// Compute the total spill weight for VReg.
+static float computeWeight(const MachineRegisterInfo *MRI,
+ const MachineBlockFrequencyInfo *MBFI,
+ unsigned VReg) {
+ float weight = 0.0f;
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg))
+ weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
+ MO.getParent());
+ return weight;
+}
+
+bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Register Coloring **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual
+ // registers could be modified before the longjmp is executed, resulting in
+ // the wrong value being used afterwards. (See <rdar://problem/8007500>.)
+ // TODO: Does WebAssembly need to care about setjmp for register coloring?
+ if (MF.exposesReturnsTwice())
+ return false;
+
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ LiveIntervals *Liveness = &getAnalysis<LiveIntervals>();
+ const MachineBlockFrequencyInfo *MBFI =
+ &getAnalysis<MachineBlockFrequencyInfo>();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+ // Gather all register intervals into a list and sort them.
+ unsigned NumVRegs = MRI->getNumVirtRegs();
+ SmallVector<LiveInterval *, 0> SortedIntervals;
+ SortedIntervals.reserve(NumVRegs);
+
+ DEBUG(dbgs() << "Interesting register intervals:\n");
+ for (unsigned i = 0; i < NumVRegs; ++i) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+ if (MFI.isVRegStackified(VReg))
+ continue;
+ // Skip unused registers, which can use $discard.
+ if (MRI->use_empty(VReg))
+ continue;
+
+ LiveInterval *LI = &Liveness->getInterval(VReg);
+ assert(LI->weight == 0.0f);
+ LI->weight = computeWeight(MRI, MBFI, VReg);
+ DEBUG(LI->dump());
+ SortedIntervals.push_back(LI);
+ }
+ DEBUG(dbgs() << '\n');
+
+ // Sort them to put arguments first (since we don't want to rename live-in
+ // registers), by weight next, and then by position.
+ // TODO: Investigate more intelligent sorting heuristics. For starters, we
+ // should try to coalesce adjacent live intervals before non-adjacent ones.
+ std::sort(SortedIntervals.begin(), SortedIntervals.end(),
+ [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+ if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+ return MRI->isLiveIn(LHS->reg);
+ if (LHS->weight != RHS->weight)
+ return LHS->weight > RHS->weight;
+ if (LHS->empty() || RHS->empty())
+ return !LHS->empty() && RHS->empty();
+ return *LHS < *RHS;
+ });
+
+ DEBUG(dbgs() << "Coloring register intervals:\n");
+ SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
+ SmallVector<SmallVector<LiveInterval *, 4>, 16> Assignments(
+ SortedIntervals.size());
+ BitVector UsedColors(SortedIntervals.size());
+ bool Changed = false;
+ for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+ LiveInterval *LI = SortedIntervals[i];
+ unsigned Old = LI->reg;
+ size_t Color = i;
+ const TargetRegisterClass *RC = MRI->getRegClass(Old);
+
+ // Check if it's possible to reuse any of the used colors.
+ if (!MRI->isLiveIn(Old))
+ for (int C(UsedColors.find_first()); C != -1;
+ C = UsedColors.find_next(C)) {
+ if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
+ continue;
+ for (LiveInterval *OtherLI : Assignments[C])
+ if (!OtherLI->empty() && OtherLI->overlaps(*LI))
+ goto continue_outer;
+ Color = C;
+ break;
+ continue_outer:;
+ }
+
+ unsigned New = SortedIntervals[Color]->reg;
+ SlotMapping[i] = New;
+ Changed |= Old != New;
+ UsedColors.set(Color);
+ Assignments[Color].push_back(LI);
+ DEBUG(dbgs() << "Assigning vreg"
+ << TargetRegisterInfo::virtReg2Index(LI->reg) << " to vreg"
+ << TargetRegisterInfo::virtReg2Index(New) << "\n");
+ }
+ if (!Changed)
+ return false;
+
+ // Rewrite register operands.
+ for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+ unsigned Old = SortedIntervals[i]->reg;
+ unsigned New = SlotMapping[i];
+ if (Old != New)
+ MRI->replaceRegWith(Old, New);
+ }
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
new file mode 100644
index 0000000..f621db0
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -0,0 +1,109 @@
+//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a pass which assigns WebAssembly register
+/// numbers for CodeGen virtual registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-numbering"
+
+namespace {
+class WebAssemblyRegNumbering final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Register Numbering";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegNumbering() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegNumbering::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegNumbering() {
+ return new WebAssemblyRegNumbering();
+}
+
+bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Register Numbering **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo &FrameInfo = *MF.getFrameInfo();
+
+ MFI.initWARegs();
+
+ // WebAssembly argument registers are in the same index space as local
+ // variables. Assign the numbers for them first.
+ MachineBasicBlock &EntryMBB = MF.front();
+ for (MachineInstr &MI : EntryMBB) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::ARGUMENT_I32:
+ case WebAssembly::ARGUMENT_I64:
+ case WebAssembly::ARGUMENT_F32:
+ case WebAssembly::ARGUMENT_F64:
+ MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm());
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Then assign regular WebAssembly registers for all remaining used
+ // virtual registers. TODO: Consider sorting the registers by frequency of
+ // use, to maximize usage of small immediate fields.
+ unsigned NumArgRegs = MFI.getParams().size();
+ unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs();
+ unsigned NumStackRegs = 0;
+ unsigned CurReg = 0;
+ for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx);
+ // Handle stackified registers.
+ if (MFI.isVRegStackified(VReg)) {
+ MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++);
+ continue;
+ }
+ // Skip unused registers.
+ if (MRI.use_empty(VReg))
+ continue;
+ if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg)
+ MFI.setWAReg(VReg, NumArgRegs + CurReg++);
+ }
+ // Allocate locals for used physical registers
+ if (FrameInfo.getStackSize() > 0)
+ MFI.addPReg(WebAssembly::SP32, CurReg++);
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
new file mode 100644
index 0000000..89ef5cd
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -0,0 +1,265 @@
+//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a register stacking pass.
+///
+/// This pass reorders instructions to put register uses and defs in an order
+/// such that they form single-use expression trees. Registers fitting this form
+/// are then marked as "stackified", meaning references to them are replaced by
+/// "push" and "pop" from the stack.
+///
+/// This is primarily a code size optimization, since temporary values on the
+/// expression don't need to be named.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-stackify"
+
+namespace {
+class WebAssemblyRegStackify final : public MachineFunctionPass {
+ const char *getPassName() const override {
+ return "WebAssembly Register Stackify";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addPreservedID(LiveVariablesID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegStackify() {
+ return new WebAssemblyRegStackify();
+}
+
+// Decorate the given instruction with implicit operands that enforce the
+// expression stack ordering constraints for an instruction which is on
+// the expression stack.
+static void ImposeStackOrdering(MachineInstr *MI) {
+ // Write the opaque EXPR_STACK register.
+ if (!MI->definesRegister(WebAssembly::EXPR_STACK))
+ MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+ /*isDef=*/true,
+ /*isImp=*/true));
+
+ // Also read the opaque EXPR_STACK register.
+ if (!MI->readsRegister(WebAssembly::EXPR_STACK))
+ MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+ /*isDef=*/false,
+ /*isImp=*/true));
+}
+
+// Test whether it's safe to move Def to just before Insert.
+// TODO: Compute memory dependencies in a way that doesn't require always
+// walking the block.
+// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
+// more precise.
+static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+ AliasAnalysis &AA, LiveIntervals &LIS,
+ MachineRegisterInfo &MRI) {
+ assert(Def->getParent() == Insert->getParent());
+ bool SawStore = false, SawSideEffects = false;
+ MachineBasicBlock::const_iterator D(Def), I(Insert);
+
+ // Check for register dependencies.
+ for (const MachineOperand &MO : Def->operands()) {
+ if (!MO.isReg() || MO.isUndef())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ // If the register is dead here and at Insert, ignore it.
+ if (MO.isDead() && Insert->definesRegister(Reg) &&
+ !Insert->readsRegister(Reg))
+ continue;
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ // If the physical register is never modified, ignore it.
+ if (!MRI.isPhysRegModified(Reg))
+ continue;
+ // Otherwise, it's a physical register with unknown liveness.
+ return false;
+ }
+
+ // Ask LiveIntervals whether moving this virtual register use or def to
+ // Insert will change value numbers are seen.
+ const LiveInterval &LI = LIS.getInterval(Reg);
+ VNInfo *DefVNI = MO.isDef() ?
+ LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) :
+ LI.getVNInfoBefore(LIS.getInstructionIndex(Def));
+ assert(DefVNI && "Instruction input missing value number");
+ VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert));
+ if (InsVNI && DefVNI != InsVNI)
+ return false;
+ }
+
+ // Check for memory dependencies and side effects.
+ for (--I; I != D; --I)
+ SawSideEffects |= I->isSafeToMove(&AA, SawStore);
+ return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
+ !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+}
+
+bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** Register Stackifying **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ bool Changed = false;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+ // Walk the instructions from the bottom up. Currently we don't look past
+ // block boundaries, and the blocks aren't ordered so the block visitation
+ // order isn't significant, but we may want to change this in the future.
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : reverse(MBB)) {
+ MachineInstr *Insert = &MI;
+ // Don't nest anything inside a phi.
+ if (Insert->getOpcode() == TargetOpcode::PHI)
+ break;
+
+ // Don't nest anything inside an inline asm, because we don't have
+ // constraints for $push inputs.
+ if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+ break;
+
+ // Iterate through the inputs in reverse order, since we'll be pulling
+ // operands off the stack in LIFO order.
+ bool AnyStackified = false;
+ for (MachineOperand &Op : reverse(Insert->uses())) {
+ // We're only interested in explicit virtual register operands.
+ if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
+ continue;
+
+ unsigned Reg = Op.getReg();
+
+ // Only consider registers with a single definition.
+ // TODO: Eventually we may relax this, to stackify phi transfers.
+ MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+ if (!Def)
+ continue;
+
+ // There's no use in nesting implicit defs inside anything.
+ if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+ continue;
+
+ // Don't nest an INLINE_ASM def into anything, because we don't have
+ // constraints for $pop outputs.
+ if (Def->getOpcode() == TargetOpcode::INLINEASM)
+ continue;
+
+ // Don't nest PHIs inside of anything.
+ if (Def->getOpcode() == TargetOpcode::PHI)
+ continue;
+
+ // Argument instructions represent live-in registers and not real
+ // instructions.
+ if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
+ Def->getOpcode() == WebAssembly::ARGUMENT_F64)
+ continue;
+
+ // Single-use expression trees require defs that have one use.
+ // TODO: Eventually we'll relax this, to take advantage of set_local
+ // returning its result.
+ if (!MRI.hasOneUse(Reg))
+ continue;
+
+ // For now, be conservative and don't look across block boundaries.
+ // TODO: Be more aggressive?
+ if (Def->getParent() != &MBB)
+ continue;
+
+ // Don't move instructions that have side effects or memory dependencies
+ // or other complications.
+ if (!IsSafeToMove(Def, Insert, AA, LIS, MRI))
+ continue;
+
+ Changed = true;
+ AnyStackified = true;
+ // Move the def down and nest it in the current instruction.
+ MBB.splice(Insert, &MBB, Def);
+ LIS.handleMove(Def);
+ MFI.stackifyVReg(Reg);
+ ImposeStackOrdering(Def);
+ Insert = Def;
+ }
+ if (AnyStackified)
+ ImposeStackOrdering(&MI);
+ }
+ }
+
+ // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere
+ // so that it never looks like a use-before-def.
+ if (Changed) {
+ MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
+ for (MachineBasicBlock &MBB : MF)
+ MBB.addLiveIn(WebAssembly::EXPR_STACK);
+ }
+
+#ifndef NDEBUG
+ // Verify that pushes and pops are performed in FIFO order.
+ SmallVector<unsigned, 0> Stack;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ for (MachineOperand &MO : reverse(MI.explicit_operands())) {
+ if (!MO.isReg())
+ continue;
+ unsigned VReg = MO.getReg();
+
+ // Don't stackify physregs like SP or FP.
+ if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ continue;
+
+ if (MFI.isVRegStackified(VReg)) {
+ if (MO.isDef())
+ Stack.push_back(VReg);
+ else
+ assert(Stack.pop_back_val() == VReg);
+ }
+ }
+ }
+ // TODO: Generalize this code to support keeping values on the stack across
+ // basic block boundaries.
+ assert(Stack.empty());
+ }
+#endif
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 385c40b..dcada45 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -43,7 +43,7 @@ WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
}
BitVector
-WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
BitVector Reserved(getNumRegs());
for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32,
WebAssembly::FP64})
@@ -52,9 +52,37 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
void WebAssemblyRegisterInfo::eliminateFrameIndex(
- MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME
+ MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum, RegScavenger * /*RS*/) const {
+ assert(SPAdj == 0);
+ MachineInstr &MI = *II;
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ const MachineFrameInfo& MFI = *MF.getFrameInfo();
+ int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+
+ if (MI.mayLoadOrStore()) {
+ // If this is a load or store, make it relative to SP and fold the frame
+ // offset directly in
+ assert(MI.getOperand(1).getImm() == 0 &&
+ "Can't eliminate FI yet if offset is already set");
+ MI.getOperand(1).setImm(FrameOffset);
+ MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+ } else {
+ // Otherwise create an i32.add SP, offset and make it the operand
+ auto &MRI = MF.getRegInfo();
+ const auto *TII = MF.getSubtarget().getInstrInfo();
+
+ unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg)
+ .addImm(FrameOffset);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg)
+ .addReg(WebAssembly::SP32)
+ .addReg(OffsetReg);
+ MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false);
+ }
}
unsigned
@@ -67,21 +95,11 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return Regs[TFI->hasFP(MF)][TT.isArch64Bit()];
}
-bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
- return !MF.getFunction()->hasFnAttribute("no-realign-stack");
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool WebAssemblyRegisterInfo::needsStackRealignment(
- const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment =
- ((MFI->getMaxAlignment() > StackAlign) ||
- F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::StackAlignment));
-
- return requiresRealignment && canRealignStack(MF);
+const TargetRegisterClass *
+WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ assert(Kind == 0 && "Only one kind of pointer on WebAssembly");
+ if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64())
+ return &WebAssembly::I64RegClass;
+ return &WebAssembly::I32RegClass;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index dbdb9d0..ad1d71e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -42,9 +42,9 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
- // Base pointer (stack realignment) support.
- bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 2ba42eb..80a83fa 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -33,22 +33,26 @@ def FP64 : WebAssemblyReg<"%FP64">;
def SP32 : WebAssemblyReg<"%SP32">;
def SP64 : WebAssemblyReg<"%SP64">;
-// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do
-// away with it? Try deleting once the backend works.
-// WebAssembly uses virtual registers, but the backend defines a few physical
-// registers here to keep SDAG and the MachineInstr layers happy.
-foreach i = 0-4 in {
- def I#i : WebAssemblyReg<"%i."#i>; // i32
- def L#i : WebAssemblyReg<"%l."#i>; // i64
- def F#i : WebAssemblyReg<"%f."#i>; // f32
- def D#i : WebAssemblyReg<"%d."#i>; // f64
-}
+// The register allocation framework requires register classes have at least
+// one register, so we define a few for the floating point register classes
+// since we otherwise don't need a physical register in those classes.
+def F32_0 : WebAssemblyReg<"%f32.0">;
+def F64_0 : WebAssemblyReg<"%f64.0">;
+
+// The expression stack "register". This is an opaque entity which serves to
+// order uses and defs that must remain in LIFO order.
+def EXPR_STACK : WebAssemblyReg<"STACK">;
+
+// The incoming arguments "register". This is an opaque entity which serves to
+// order the ARGUMENT instructions that are emulating live-in registers and
+// must not be scheduled below other instructions.
+def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>;
-def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>;
-def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
-def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>;
+def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
+def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
+def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
+def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
new file mode 100644
index 0000000..4e08b2b
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -0,0 +1,124 @@
+//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an optimization pass using store result values.
+///
+/// WebAssembly's store instructions return the stored value. This is to enable
+/// an optimization wherein uses of the stored value can be replaced by uses of
+/// the store's result value, making the stored value register more likely to
+/// be single-use, thus more likely to be useful to register stackifying, and
+/// potentially also exposing the store to register stackifying. These both can
+/// reduce get_local/set_local traffic.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-store-results"
+
+namespace {
+class WebAssemblyStoreResults final : public MachineFunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override {
+ return "WebAssembly Store Results";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyStoreResults::ID = 0;
+FunctionPass *llvm::createWebAssemblyStoreResults() {
+ return new WebAssemblyStoreResults();
+}
+
+bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG({
+ dbgs() << "********** Store Results **********\n"
+ << "********** Function: " << MF.getName() << '\n';
+ });
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ bool Changed = false;
+
+ assert(MRI.isSSA() && "StoreResults depends on SSA form");
+
+ for (auto &MBB : MF) {
+ DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
+ for (auto &MI : MBB)
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case WebAssembly::STORE8_I32:
+ case WebAssembly::STORE16_I32:
+ case WebAssembly::STORE8_I64:
+ case WebAssembly::STORE16_I64:
+ case WebAssembly::STORE32_I64:
+ case WebAssembly::STORE_F32:
+ case WebAssembly::STORE_F64:
+ case WebAssembly::STORE_I32:
+ case WebAssembly::STORE_I64:
+ unsigned ToReg = MI.getOperand(0).getReg();
+ unsigned FromReg = MI.getOperand(3).getReg();
+ for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+ MachineOperand &O = *I++;
+ MachineInstr *Where = O.getParent();
+ if (Where->getOpcode() == TargetOpcode::PHI) {
+ // PHIs use their operands on their incoming CFG edges rather than
+ // in their parent blocks. Get the basic block paired with this use
+ // of FromReg and check that MI's block dominates it.
+ MachineBasicBlock *Pred =
+ Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
+ if (!MDT.dominates(&MBB, Pred))
+ continue;
+ } else {
+ // For a non-PHI, check that MI dominates the instruction in the
+ // normal way.
+ if (&MI == Where || !MDT.dominates(&MI, Where))
+ continue;
+ }
+ Changed = true;
+ DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
+ << " from " << MI << "\n");
+ O.setReg(ToReg);
+ // If the store's def was previously dead, it is no longer. But the
+ // dead flag shouldn't be set yet.
+ assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 3d9e7aa..cb2d5a6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
TLInfo(TM, *this) {}
bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
+bool WebAssemblySubtarget::useAA() const { return true; }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 6f17619..f530a29 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -61,9 +61,15 @@ public:
const WebAssemblyTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+ const WebAssemblyInstrInfo *getInstrInfo() const override {
+ return &InstrInfo;
+ }
+ const WebAssemblyRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override;
- bool useAA() const override { return true; }
+ bool useAA() const override;
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 6f93248..e31ea46 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,11 +45,16 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT.isArch64Bit()
- ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128"
- : "e-p:32:32-i64:64-v128:8:128-n32:64-S128",
+ : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
+ : "e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+ // WebAssembly type-checks expressions, but a noreturn function with a return
+ // type that doesn't match the context will cause a check failure. So we lower
+ // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
+ // 'unreachable' expression which is meant for that case.
+ this->Options.TrapUnreachable = true;
+
initAsmInfo();
// We need a reducible CFG, so disable some optimizations which tend to
@@ -77,7 +82,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+ I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
}
return I.get();
}
@@ -94,23 +99,18 @@ public:
}
FunctionPass *createTargetRegisterAllocator(bool) override;
- void addFastRegAlloc(FunctionPass *RegAllocPass) override;
- void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
void addIRPasses() override;
- bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
- void addRegAllocPasses(bool Optimized);
void addPostRegAlloc() override;
- void addPreSched2() override;
void addPreEmitPass() override;
};
} // end anonymous namespace
TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
});
}
@@ -124,50 +124,86 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "WebAssembly uses no regalloc!");
- addRegAllocPasses(false);
-}
-
-void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
- assert(!RegAllocPass && "WebAssembly uses no regalloc!");
- addRegAllocPasses(true);
-}
-
//===----------------------------------------------------------------------===//
// The following functions are called from lib/CodeGen/Passes.cpp to modify
// the CodeGen pass sequence.
//===----------------------------------------------------------------------===//
void WebAssemblyPassConfig::addIRPasses() {
- // FIXME: the default for this option is currently POSIX, whereas
- // WebAssembly's MVP should default to Single.
if (TM->Options.ThreadModel == ThreadModel::Single)
+ // In "single" mode, atomics get lowered to non-atomics.
addPass(createLowerAtomicPass());
else
// Expand some atomic operations. WebAssemblyTargetLowering has hooks which
// control specifically what gets lowered.
addPass(createAtomicExpandPass(TM));
+ // Optimize "returned" function attributes.
+ addPass(createWebAssemblyOptimizeReturned());
+
TargetPassConfig::addIRPasses();
}
-bool WebAssemblyPassConfig::addPreISel() { return false; }
-
bool WebAssemblyPassConfig::addInstSelector() {
+ (void)TargetPassConfig::addInstSelector();
addPass(
createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
+ // Run the argument-move pass immediately after the ScheduleDAG scheduler
+ // so that we can fix up the ARGUMENT instructions before anything else
+ // sees them in the wrong place.
+ addPass(createWebAssemblyArgumentMove());
return false;
}
-bool WebAssemblyPassConfig::addILPOpts() { return true; }
+bool WebAssemblyPassConfig::addILPOpts() {
+ (void)TargetPassConfig::addILPOpts();
+ return true;
+}
+
+void WebAssemblyPassConfig::addPreRegAlloc() {
+ TargetPassConfig::addPreRegAlloc();
-void WebAssemblyPassConfig::addPreRegAlloc() {}
+ // Prepare store instructions for register stackifying.
+ addPass(createWebAssemblyStoreResults());
+}
-void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {}
+void WebAssemblyPassConfig::addPostRegAlloc() {
+ // TODO: The following CodeGen passes don't currently support code containing
+ // virtual registers. Consider removing their restrictions and re-enabling
+ // them.
+ //
+ // We use our own PrologEpilogInserter which is very slightly modified to
+ // tolerate virtual registers.
+ disablePass(&PrologEpilogCodeInserterID);
+ // Fails with: should be run after register allocation.
+ disablePass(&MachineCopyPropagationID);
+
+ // Mark registers as representing wasm's expression stack.
+ addPass(createWebAssemblyRegStackify());
+
+ // Run the register coloring pass to reduce the total number of registers.
+ addPass(createWebAssemblyRegColoring());
+
+ TargetPassConfig::addPostRegAlloc();
+
+ // Run WebAssembly's version of the PrologEpilogInserter. Target-independent
+ // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run
+ // PEI before ShrinkWrap but otherwise in the same position in the order.
+ addPass(createWebAssemblyPEI());
+}
-void WebAssemblyPassConfig::addPostRegAlloc() {}
+void WebAssemblyPassConfig::addPreEmitPass() {
+ TargetPassConfig::addPreEmitPass();
-void WebAssemblyPassConfig::addPreSched2() {}
+ // Put the CFG in structured form; insert BLOCK and LOOP markers.
+ addPass(createWebAssemblyCFGStackify());
-void WebAssemblyPassConfig::addPreEmitPass() {}
+ // Lower br_unless into br_if.
+ addPass(createWebAssemblyLowerBrUnless());
+
+ // Create a mapping from LLVM CodeGen virtual registers to wasm registers.
+ addPass(createWebAssemblyRegNumbering());
+
+ // Perform the very last peephole optimizations on the code.
+ addPass(createWebAssemblyPeephole());
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
new file mode 100644
index 0000000..74e33b9
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the functions of the WebAssembly-specific subclass
+/// of TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetObjectFile.h"
+#include "WebAssemblyTargetMachine.h"
+using namespace llvm;
+
+void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index ee78b94..39e50c9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -16,50 +16,13 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
namespace llvm {
-class GlobalVariable;
-
-class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile {
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF {
public:
- WebAssemblyTargetObjectFile() {
- TextSection = nullptr;
- DataSection = nullptr;
- BSSSection = nullptr;
- ReadOnlySection = nullptr;
-
- StaticCtorSection = nullptr;
- StaticDtorSection = nullptr;
- LSDASection = nullptr;
- EHFrameSection = nullptr;
- DwarfAbbrevSection = nullptr;
- DwarfInfoSection = nullptr;
- DwarfLineSection = nullptr;
- DwarfFrameSection = nullptr;
- DwarfPubTypesSection = nullptr;
- DwarfDebugInlineSection = nullptr;
- DwarfStrSection = nullptr;
- DwarfLocSection = nullptr;
- DwarfARangesSection = nullptr;
- DwarfRangesSection = nullptr;
- }
-
- MCSection *getSectionForConstant(SectionKind Kind,
- const Constant *C) const override {
- return ReadOnlySection;
- }
-
- MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const override {
- return DataSection;
- }
-
- MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
- Mangler &Mang,
- const TargetMachine &TM) const override;
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index fa88ed5..3566317 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -21,8 +21,7 @@ using namespace llvm;
#define DEBUG_TYPE "wasmtti"
TargetTransformInfo::PopcntSupportKind
-WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) {
+WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- // TODO: Make Math.popcount32 happen in WebAssembly.
- return TTI::PSK_Software;
+ return TargetTransformInfo::PSK_FastHardware;
}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 7ffb604..26dc388 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -38,7 +38,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
const WebAssemblyTargetLowering *getTLI() const { return TLI; }
public:
- WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
+ WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -54,7 +54,7 @@ public:
// TODO: Implement more Scalar TTI for WebAssembly
- TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
/// @}
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
new file mode 100644
index 0000000..ee9d060
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -0,0 +1,311 @@
+# Tests which are known to fail from the GCC torture test suite.
+
+# Core dump.
+920908-1.c
+pr38151.c
+va-arg-22.c
+
+# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
+struct-ret-1.c
+va-arg-11.c
+va-arg-21.c
+va-arg-24.c
+va-arg-trap-1.c
+
+# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed.
+20000815-1.c
+20010129-1.c
+930628-1.c
+980707-1.c
+
+# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed.
+20030914-2.c
+20040703-1.c
+20081117-1.c
+920625-1.c
+931004-11.c
+931004-13.c
+980223.c
+bitfld-5.c
+complex-7.c
+pr38969.c
+pr51323.c
+pr52129.c
+pr57130.c
+
+# These were previously "Cannot select FrameIndex." Now most of them fail
+# because they contain call frame pseudos (e.g. call a vararg func),
+# frame pointers, or similar. This list will be updated again soon.
+20000519-1.c
+20000706-4.c
+20000706-5.c
+20000801-2.c
+20000801-4.c
+20011126-2.c
+
+20020529-1.c
+20021024-1.c
+
+20030828-1.c
+20030914-1.c
+
+20040302-1.c
+20040625-1.c
+20040823-1.c
+
+20041113-1.c
+
+20041214-1.c
+
+20050826-2.c
+
+20071213-1.c
+
+20080506-2.c
+20080519-1.c
+
+20081103-1.c
+20090113-1.c
+20090113-2.c
+20090113-3.c
+
+20090623-1.c
+
+920501-6.c
+920501-8.c
+920726-1.c
+930518-1.c
+
+931004-10.c
+931004-12.c
+931004-14.c
+931004-2.c
+931004-4.c
+931004-6.c
+931004-8.c
+
+980205.c
+980608-1.c
+980709-1.c
+980716-1.c
+990127-1.c
+
+991216-2.c
+
+#cbrt.c
+complex-5.c
+complex-6.c
+
+enum-3.c
+fprintf-chk-1.c
+frame-address.c
+loop-15.c
+loop-ivopts-2.c
+mayalias-3.c
+
+multi-ix.c
+
+pr20466-1.c
+
+
+pr28778.c
+pr28982b.c
+
+pr30778.c
+pr31448-2.c
+pr31448.c
+
+pr33870-1.c
+pr33870.c
+
+pr38051.c
+
+pr39100.c
+
+pr39339.c
+pr40022.c
+pr40657.c
+
+pr43987.c
+
+pr44575.c
+
+pr44942.c
+pr46309.c
+pr47538.c
+pr47925.c
+
+pr49390.c
+pr49419.c
+
+#pr51877.c
+
+#pr52979-1.c
+#pr52979-2.c
+pr53645-2.c
+pr53645.c
+
+pr56205.c
+
+pr56866.c
+
+pr57876.c
+pr58277-1.c
+
+pr59643.c
+
+printf-chk-1.c
+pta-field-1.c
+pta-field-2.c
+
+stdarg-1.c
+stdarg-2.c
+stdarg-3.c
+stdarg-4.c
+strct-stdarg-1.c
+strct-varg-1.c
+
+va-arg-1.c
+va-arg-10.c
+va-arg-12.c
+va-arg-13.c
+va-arg-14.c
+va-arg-15.c
+va-arg-16.c
+va-arg-17.c
+va-arg-18.c
+va-arg-19.c
+va-arg-2.c
+va-arg-20.c
+va-arg-23.c
+va-arg-26.c
+va-arg-4.c
+va-arg-5.c
+va-arg-6.c
+va-arg-7.c
+va-arg-8.c
+va-arg-9.c
+va-arg-pack-1.c
+vfprintf-1.c
+vfprintf-chk-1.c
+vprintf-1.c
+vprintf-chk-1.c
+
+# Cannot select callseq_end.
+20040811-1.c
+pr43220.c
+vla-dealloc-1.c
+
+# Cannot select brind.
+20071210-1.c
+920501-4.c
+920501-5.c
+
+# Cannot select BlockAddress.
+comp-goto-1.c
+980526-1.c
+990208-1.c
+
+# WebAssembly hasn't implemented byval arguments.
+20000412-3.c
+20000419-1.c
+20000706-1.c
+20000706-2.c
+20000707-1.c
+20000717-1.c
+20000717-5.c
+20000808-1.c
+20010605-2.c
+20011113-1.c
+20020215-1.c
+20020810-1.c
+20021118-1.c
+20040707-1.c
+20040709-1.c
+20040709-2.c
+20041201-1.c
+20050713-1.c
+20070614-1.c
+920908-2.c
+921112-1.c
+921117-1.c
+921123-2.c
+921204-1.c
+930126-1.c
+930208-1.c
+931004-5.c
+931004-9.c
+931031-1.c
+950607-2.c
+960416-1.c
+990525-1.c
+991118-1.c
+bf64-1.c
+complex-1.c
+complex-2.c
+pr15262-2.c
+pr20621-1.c
+pr23135.c
+pr30185.c
+pr42248.c
+
+# unimplemented operation lowering.
+20010122-1.c
+20030323-1.c
+20030811-1.c
+pr17377.c
+
+# Error: invalid output constraint '=t' in asm.
+990413-2.c
+990826-0.c
+
+# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target.
+built-in-setjmp.c
+pr60003.c
+
+# Error in the program / unsupported by Clang.
+scal-to-vec1.c
+scal-to-vec2.c
+scal-to-vec3.c
+20000822-1.c
+20010209-1.c
+20010605-1.c
+20030501-1.c
+20040520-1.c
+20061220-1.c
+20090219-1.c
+920415-1.c
+920428-2.c
+920501-7.c
+920612-2.c
+920721-4.c
+921017-1.c
+921215-1.c
+931002-1.c
+comp-goto-2.c
+nest-align-1.c
+nest-stdar-1.c
+nestfunc-1.c
+nestfunc-2.c
+nestfunc-3.c
+nestfunc-5.c
+nestfunc-6.c
+nestfunc-7.c
+pr22061-3.c
+pr22061-4.c
+pr24135.c
+pr51447.c
+20020412-1.c
+20040308-1.c
+20040423-1.c
+20041218-2.c
+20070919-1.c
+align-nest.c
+pr41935.c
+20050107-1.c
+20050119-1.c
+20050119-2.c
+920302-1.c
+920501-3.c
+920728-1.c
+pr28865.c
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 9eee4a0..09cc53a 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -10,10 +10,8 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "X86AsmInstrumentation.h"
#include "X86Operand.h"
-#include "X86RegisterInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
@@ -118,11 +116,6 @@ bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
-std::string FuncName(unsigned AccessSize, bool IsWrite) {
- return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
- utostr(AccessSize);
-}
-
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
struct RegisterContext {
@@ -136,26 +129,26 @@ public:
public:
RegisterContext(unsigned AddressReg, unsigned ShadowReg,
unsigned ScratchReg) {
- BusyRegs.push_back(convReg(AddressReg, MVT::i64));
- BusyRegs.push_back(convReg(ShadowReg, MVT::i64));
- BusyRegs.push_back(convReg(ScratchReg, MVT::i64));
+ BusyRegs.push_back(convReg(AddressReg, 64));
+ BusyRegs.push_back(convReg(ShadowReg, 64));
+ BusyRegs.push_back(convReg(ScratchReg, 64));
}
- unsigned AddressReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT);
+ unsigned AddressReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size);
}
- unsigned ShadowReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_SHADOW], VT);
+ unsigned ShadowReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_SHADOW], Size);
}
- unsigned ScratchReg(MVT::SimpleValueType VT) const {
- return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT);
+ unsigned ScratchReg(unsigned Size) const {
+ return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size);
}
void AddBusyReg(unsigned Reg) {
if (Reg != X86::NoRegister)
- BusyRegs.push_back(convReg(Reg, MVT::i64));
+ BusyRegs.push_back(convReg(Reg, 64));
}
void AddBusyRegs(const X86Operand &Op) {
@@ -163,36 +156,36 @@ public:
AddBusyReg(Op.getMemIndexReg());
}
- unsigned ChooseFrameReg(MVT::SimpleValueType VT) const {
+ unsigned ChooseFrameReg(unsigned Size) const {
static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
X86::RCX, X86::RDX, X86::RDI,
X86::RSI };
for (unsigned Reg : Candidates) {
if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
- return convReg(Reg, VT);
+ return convReg(Reg, Size);
}
return X86::NoRegister;
}
private:
- unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const {
- return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT);
+ unsigned convReg(unsigned Reg, unsigned Size) const {
+ return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size);
}
std::vector<unsigned> BusyRegs;
};
- X86AddressSanitizer(const MCSubtargetInfo &STI)
+ X86AddressSanitizer(const MCSubtargetInfo *&STI)
: X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
- virtual ~X86AddressSanitizer() {}
+ ~X86AddressSanitizer() override {}
// X86AsmInstrumentation implementation:
- virtual void InstrumentAndEmitInstruction(const MCInst &Inst,
- OperandVector &Operands,
- MCContext &Ctx,
- const MCInstrInfo &MII,
- MCStreamer &Out) override {
+ void InstrumentAndEmitInstruction(const MCInst &Inst,
+ OperandVector &Operands,
+ MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out) override {
InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
if (RepPrefix)
EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
@@ -240,17 +233,16 @@ public:
protected:
void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
- void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg,
- MCStreamer &Out) {
- assert(VT == MVT::i32 || VT == MVT::i64);
+ void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) {
+ assert(Size == 32 || Size == 64);
MCInst Inst;
- Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
- Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, VT)));
+ Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r);
+ Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size)));
Op.addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
- void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT,
+ void ComputeMemOperandAddress(X86Operand &Op, unsigned Size,
unsigned Reg, MCContext &Ctx, MCStreamer &Out);
// Creates new memory operand with Displacement added to an original
@@ -261,13 +253,13 @@ protected:
MCContext &Ctx, int64_t *Residue);
bool is64BitMode() const {
- return STI.getFeatureBits()[X86::Mode64Bit];
+ return STI->getFeatureBits()[X86::Mode64Bit];
}
bool is32BitMode() const {
- return STI.getFeatureBits()[X86::Mode32Bit];
+ return STI->getFeatureBits()[X86::Mode32Bit];
}
bool is16BitMode() const {
- return STI.getFeatureBits()[X86::Mode16Bit];
+ return STI->getFeatureBits()[X86::Mode16Bit];
}
unsigned getPointerWidth() {
@@ -437,7 +429,7 @@ void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
}
void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
- MVT::SimpleValueType VT,
+ unsigned Size,
unsigned Reg, MCContext &Ctx,
MCStreamer &Out) {
int64_t Displacement = 0;
@@ -450,14 +442,14 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
// Emit Op as is.
if (Displacement == 0) {
- EmitLEA(Op, VT, Reg, Out);
+ EmitLEA(Op, Size, Reg, Out);
return;
}
int64_t Residue;
std::unique_ptr<X86Operand> NewOp =
AddDisplacement(Op, Displacement, Ctx, &Residue);
- EmitLEA(*NewOp, VT, Reg, Out);
+ EmitLEA(*NewOp, Size, Reg, Out);
while (Residue != 0) {
const MCConstantExpr *Disp =
@@ -465,7 +457,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
std::unique_ptr<X86Operand> DispOp =
X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
SMLoc());
- EmitLEA(*DispOp, VT, Reg, Out);
+ EmitLEA(*DispOp, Size, Reg, Out);
Residue -= Disp->getValue();
}
}
@@ -503,16 +495,16 @@ class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x20000000;
- X86AddressSanitizer32(const MCSubtargetInfo &STI)
+ X86AddressSanitizer32(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- virtual ~X86AddressSanitizer32() {}
+ ~X86AddressSanitizer32() override {}
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
if (FrameReg == X86::NoRegister)
return FrameReg;
- return getX86SubSuperRegister(FrameReg, MVT::i32);
+ return getX86SubSuperRegister(FrameReg, 32);
}
void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -535,10 +527,10 @@ public:
OrigSPOffset += 4;
}
- virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
assert(LocalFrameReg != X86::NoRegister);
const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -558,24 +550,24 @@ public:
MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
}
- SpillReg(Out, RegCtx.AddressReg(MVT::i32));
- SpillReg(Out, RegCtx.ShadowReg(MVT::i32));
- if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(MVT::i32));
+ SpillReg(Out, RegCtx.AddressReg(32));
+ SpillReg(Out, RegCtx.ShadowReg(32));
+ if (RegCtx.ScratchReg(32) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(32));
StoreFlags(Out);
}
- virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+ void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
assert(LocalFrameReg != X86::NoRegister);
RestoreFlags(Out);
- if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(MVT::i32));
- RestoreReg(Out, RegCtx.ShadowReg(MVT::i32));
- RestoreReg(Out, RegCtx.AddressReg(MVT::i32));
+ if (RegCtx.ScratchReg(32) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(32));
+ RestoreReg(Out, RegCtx.ShadowReg(32));
+ RestoreReg(Out, RegCtx.AddressReg(32));
unsigned FrameReg = GetFrameReg(Ctx, Out);
if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
@@ -586,18 +578,18 @@ public:
}
}
- virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
+ void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
@@ -610,10 +602,11 @@ private:
.addReg(X86::ESP)
.addImm(-16));
EmitInstruction(
- Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
+ Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
- const std::string &Fn = FuncName(AccessSize, IsWrite);
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ (IsWrite ? "store" : "load") +
+ llvm::Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -623,14 +616,14 @@ private:
void X86AddressSanitizer32::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
- assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+ assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
- ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+ ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
AddressRegI32));
@@ -673,7 +666,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+ EmitLEA(*Op, 32, ScratchRegI32, Out);
break;
}
case 4:
@@ -698,10 +691,10 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
void X86AddressSanitizer32::InstrumentMemOperandLarge(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
- ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+ ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
AddressRegI32));
@@ -760,16 +753,16 @@ class X86AddressSanitizer64 : public X86AddressSanitizer {
public:
static const long kShadowOffset = 0x7fff8000;
- X86AddressSanitizer64(const MCSubtargetInfo &STI)
+ X86AddressSanitizer64(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- virtual ~X86AddressSanitizer64() {}
+ ~X86AddressSanitizer64() override {}
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
if (FrameReg == X86::NoRegister)
return FrameReg;
- return getX86SubSuperRegister(FrameReg, MVT::i64);
+ return getX86SubSuperRegister(FrameReg, 64);
}
void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -792,10 +785,10 @@ public:
OrigSPOffset += 8;
}
- virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
assert(LocalFrameReg != X86::NoRegister);
const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -816,24 +809,24 @@ public:
}
EmitAdjustRSP(Ctx, Out, -128);
- SpillReg(Out, RegCtx.ShadowReg(MVT::i64));
- SpillReg(Out, RegCtx.AddressReg(MVT::i64));
- if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
- SpillReg(Out, RegCtx.ScratchReg(MVT::i64));
+ SpillReg(Out, RegCtx.ShadowReg(64));
+ SpillReg(Out, RegCtx.AddressReg(64));
+ if (RegCtx.ScratchReg(64) != X86::NoRegister)
+ SpillReg(Out, RegCtx.ScratchReg(64));
StoreFlags(Out);
}
- virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override {
- unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+ void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override {
+ unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
assert(LocalFrameReg != X86::NoRegister);
RestoreFlags(Out);
- if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
- RestoreReg(Out, RegCtx.ScratchReg(MVT::i64));
- RestoreReg(Out, RegCtx.AddressReg(MVT::i64));
- RestoreReg(Out, RegCtx.ShadowReg(MVT::i64));
+ if (RegCtx.ScratchReg(64) != X86::NoRegister)
+ RestoreReg(Out, RegCtx.ScratchReg(64));
+ RestoreReg(Out, RegCtx.AddressReg(64));
+ RestoreReg(Out, RegCtx.ShadowReg(64));
EmitAdjustRSP(Ctx, Out, 128);
unsigned FrameReg = GetFrameReg(Ctx, Out);
@@ -845,18 +838,18 @@ public:
}
}
- virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
- bool IsWrite,
- const RegisterContext &RegCtx,
- MCContext &Ctx,
- MCStreamer &Out) override;
- virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
- MCStreamer &Out) override;
+ void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+ bool IsWrite,
+ const RegisterContext &RegCtx,
+ MCContext &Ctx,
+ MCStreamer &Out) override;
+ void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+ MCStreamer &Out) override;
private:
void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
@@ -864,7 +857,7 @@ private:
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i64, X86::RSP, Out);
+ EmitLEA(*Op, 64, X86::RSP, Out);
OrigSPOffset += Offset;
}
@@ -878,12 +871,13 @@ private:
.addReg(X86::RSP)
.addImm(-16));
- if (RegCtx.AddressReg(MVT::i64) != X86::RDI) {
+ if (RegCtx.AddressReg(64) != X86::RDI) {
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
- RegCtx.AddressReg(MVT::i64)));
+ RegCtx.AddressReg(64)));
}
- const std::string &Fn = FuncName(AccessSize, IsWrite);
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ (IsWrite ? "store" : "load") +
+ llvm::Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
@@ -893,16 +887,16 @@ private:
void X86AddressSanitizer64::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
- unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
- unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
- unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+ unsigned AddressRegI64 = RegCtx.AddressReg(64);
+ unsigned AddressRegI32 = RegCtx.AddressReg(32);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
+ unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+ unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
- assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
- unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+ assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+ unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
- ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+ ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
AddressRegI64));
@@ -944,7 +938,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
SMLoc(), SMLoc()));
- EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+ EmitLEA(*Op, 32, ScratchRegI32, Out);
break;
}
case 4:
@@ -969,10 +963,10 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
void X86AddressSanitizer64::InstrumentMemOperandLarge(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
- unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
- unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
+ unsigned AddressRegI64 = RegCtx.AddressReg(64);
+ unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
- ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+ ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
AddressRegI64));
@@ -1030,7 +1024,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
} // End anonymous namespace
-X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI)
+X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
: STI(STI), InitialFrameReg(0) {}
X86AsmInstrumentation::~X86AsmInstrumentation() {}
@@ -1043,7 +1037,7 @@ void X86AsmInstrumentation::InstrumentAndEmitInstruction(
void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
const MCInst &Inst) {
- Out.EmitInstruction(Inst, STI);
+ Out.EmitInstruction(Inst, *STI);
}
unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
@@ -1067,17 +1061,17 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI) {
- Triple T(STI.getTargetTriple());
+ const MCContext &Ctx, const MCSubtargetInfo *&STI) {
+ Triple T(STI->getTargetTriple());
const bool hasCompilerRTSupport = T.isOSLinux();
if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
MCOptions.SanitizeAddress) {
- if (STI.getFeatureBits()[X86::Mode32Bit] != 0)
+ if (STI->getFeatureBits()[X86::Mode32Bit] != 0)
return new X86AddressSanitizer32(STI);
- if (STI.getFeatureBits()[X86::Mode64Bit] != 0)
+ if (STI->getFeatureBits()[X86::Mode64Bit] != 0)
return new X86AddressSanitizer64(STI);
}
return new X86AsmInstrumentation(STI);
}
-} // End llvm namespace
+} // end llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 19ebcc4..470cead 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -28,7 +28,8 @@ class X86AsmInstrumentation;
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI);
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI);
class X86AsmInstrumentation {
public:
@@ -48,15 +49,16 @@ public:
protected:
friend X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo &STI);
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI);
- X86AsmInstrumentation(const MCSubtargetInfo &STI);
+ X86AsmInstrumentation(const MCSubtargetInfo *&STI);
unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
- const MCSubtargetInfo &STI;
+ const MCSubtargetInfo *&STI;
unsigned InitialFrameReg;
};
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bca059d..4d8ffac 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,7 +11,6 @@
#include "X86AsmInstrumentation.h"
#include "X86AsmParserCommon.h"
#include "X86Operand.h"
-#include "X86ISelLowering.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
@@ -26,6 +25,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -57,10 +57,10 @@ static const char OpPrecedence[] = {
};
class X86AsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
const MCInstrInfo &MII;
ParseInstructionInfo *InstInfo;
std::unique_ptr<X86AsmInstrumentation> Instrumentation;
+
private:
SMLoc consumeToken() {
MCAsmParser &Parser = getParser();
@@ -154,6 +154,7 @@ private:
// Push the new operator.
InfixOperatorStack.push_back(Op);
}
+
int64_t execute() {
// Push any remaining operators onto the postfix stack.
while (!InfixOperatorStack.empty()) {
@@ -268,6 +269,7 @@ private:
bool StopOnLBrac, AddImmPrefix;
InfixCalculator IC;
InlineAsmIdentifierInfo Info;
+
public:
IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
@@ -712,10 +714,10 @@ private:
SMLoc End, unsigned Size, StringRef Identifier,
InlineAsmIdentifierInfo &Info);
+ bool parseDirectiveEven(SMLoc L);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
@@ -758,23 +760,24 @@ private:
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode64Bit];
+ return getSTI().getFeatureBits()[X86::Mode64Bit];
}
bool is32BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode32Bit];
+ return getSTI().getFeatureBits()[X86::Mode32Bit];
}
bool is16BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return STI.getFeatureBits()[X86::Mode16Bit];
+ return getSTI().getFeatureBits()[X86::Mode16Bit];
}
void SwitchMode(unsigned mode) {
+ MCSubtargetInfo &STI = copySTI();
FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
unsigned FB = ComputeAvailableFeatures(
STI.ToggleFeature(OldMode.flip(mode)));
setAvailableFeatures(FB);
-
+
assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
}
@@ -798,12 +801,12 @@ private:
/// }
public:
- X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
+ X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
+ : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
// Initialize the set of available features.
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
Instrumentation.reset(
CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
}
@@ -912,6 +915,11 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
if (RegNo == 0)
RegNo = MatchRegisterName(Tok.getString().lower());
+ // The "flags" register cannot be referenced directly.
+ // Treat it as an identifier instead.
+ if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
+ RegNo = 0;
+
if (!is64BitMode()) {
// FIXME: This should be done using Requires<Not64BitMode> and
// Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
@@ -1042,8 +1050,11 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
.Cases("BYTE", "byte", 8)
.Cases("WORD", "word", 16)
.Cases("DWORD", "dword", 32)
+ .Cases("FWORD", "fword", 48)
.Cases("QWORD", "qword", 64)
+ .Cases("MMWORD","mmword", 64)
.Cases("XWORD", "xword", 80)
+ .Cases("TBYTE", "tbyte", 80)
.Cases("XMMWORD", "xmmword", 128)
.Cases("YMMWORD", "ymmword", 256)
.Cases("ZMMWORD", "zmmword", 512)
@@ -1062,8 +1073,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
// Insert an explicit size if the user didn't have one.
if (!Size) {
Size = getPointerWidth();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
- /*Len=*/0, Size));
+ InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+ /*Len=*/0, Size);
}
// Create an absolute memory reference in order to match against
@@ -1082,8 +1093,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
if (!Size) {
Size = Info.Type * 8; // Size is in terms of bits in this context.
if (Size)
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
- /*Len=*/0, Size));
+ InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+ /*Len=*/0, Size);
}
}
@@ -1097,13 +1108,13 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
}
static void
-RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
StringRef SymName, int64_t ImmDisp,
int64_t FinalImmDisp, SMLoc &BracLoc,
SMLoc &StartInBrac, SMLoc &End) {
// Remove the '[' and ']' from the IR string.
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+ AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
+ AsmRewrites.emplace_back(AOK_Skip, End, 1);
// If ImmDisp is non-zero, then we parsed a displacement before the
// bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
@@ -1114,15 +1125,14 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
// We have an immediate displacement before the bracketed expression.
// Adjust this to match the final immediate displacement.
bool Found = false;
- for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
- E = AsmRewrites->end(); I != E; ++I) {
- if ((*I).Loc.getPointer() > BracLoc.getPointer())
+ for (AsmRewrite &AR : AsmRewrites) {
+ if (AR.Loc.getPointer() > BracLoc.getPointer())
continue;
- if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+ if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
assert (!Found && "ImmDisp already rewritten.");
- (*I).Kind = AOK_Imm;
- (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
- (*I).Val = FinalImmDisp;
+ AR.Kind = AOK_Imm;
+ AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
+ AR.Val = FinalImmDisp;
Found = true;
break;
}
@@ -1133,28 +1143,27 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
// We have a symbolic and an immediate displacement, but no displacement
// before the bracketed expression. Put the immediate displacement
// before the bracketed expression.
- AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
+ AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
}
}
// Remove all the ImmPrefix rewrites within the brackets.
- for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
- E = AsmRewrites->end(); I != E; ++I) {
- if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+ for (AsmRewrite &AR : AsmRewrites) {
+ if (AR.Loc.getPointer() < StartInBrac.getPointer())
continue;
- if ((*I).Kind == AOK_ImmPrefix)
- (*I).Kind = AOK_Delete;
+ if (AR.Kind == AOK_ImmPrefix)
+ AR.Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
// Skip everything before the symbol.
if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
assert(Len > 0 && "Expected a non-negative length.");
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+ AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
}
// Skip everything after the symbol.
if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
assert(Len > 0 && "Expected a non-negative length.");
- AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+ AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
}
}
@@ -1162,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
+ AsmToken::TokenKind PrevTK = AsmToken::Error;
bool Done = false;
while (!Done) {
bool UpdateLocLex = true;
@@ -1205,7 +1215,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return Error(Tok.getLoc(), "Unexpected identifier!");
} else {
// This is a dot operator, not an adjacent identifier.
- if (Identifier.find('.') != StringRef::npos) {
+ if (Identifier.find('.') != StringRef::npos &&
+ PrevTK == AsmToken::RBrac) {
return false;
} else {
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
@@ -1223,8 +1234,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
case AsmToken::Integer: {
StringRef ErrMsg;
if (isParsingInlineAsm() && SM.getAddImmPrefix())
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
- Tok.getLoc()));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
// Look for 'b' or 'f' following an Integer as a directional label
SMLoc Loc = getTok().getLoc();
int64_t IntVal = getTok().getIntVal();
@@ -1237,7 +1247,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *Val =
- MCSymbolRefExpr::create(Sym, Variant, getContext());
+ MCSymbolRefExpr::create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
StringRef Identifier = Sym->getName();
@@ -1275,6 +1285,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (!Done && UpdateLocLex)
End = consumeToken();
+
+ PrevTK = TK;
}
return false;
}
@@ -1302,7 +1314,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// A symbolic displacement.
Disp = Sym;
if (isParsingInlineAsm())
- RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+ RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
ImmDisp, SM.getImm(), BracLoc, StartInBrac,
End);
}
@@ -1359,7 +1371,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
MCAsmParser &Parser = getParser();
- assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+ assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
Val = nullptr;
StringRef LineBuf(Identifier.data());
@@ -1372,15 +1384,17 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
// Advance the token stream until the end of the current token is
// after the end of what the frontend claimed.
const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
- while (true) {
+ do {
End = Tok.getEndLoc();
getLexer().Lex();
-
- assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
- if (End.getPointer() == EndPtr) break;
- }
+ } while (End.getPointer() < EndPtr);
Identifier = LineBuf;
+ // The frontend should end parsing on an assembler token boundary, unless it
+ // failed parsing.
+ assert((End.getPointer() == EndPtr || !Result) &&
+ "frontend claimed part of a token?");
+
// If the identifier lookup was unsuccessful, assume that we are dealing with
// a label.
if (!Result) {
@@ -1389,9 +1403,8 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
Loc, false);
assert(InternalName.size() && "We should have an internal name here.");
// Push a rewrite for replacing the identifier name with the internal name.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
- Identifier.size(),
- InternalName));
+ InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
+ InternalName);
}
// Create the symbol reference.
@@ -1418,8 +1431,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
if (isParsingInlineAsm())
- InstInfo->AsmRewrites->push_back(
- AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
if (getLexer().isNot(AsmToken::LBrac)) {
// An immediate following a 'segment register', 'colon' token sequence can
@@ -1588,8 +1600,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
unsigned Len = DotDispStr.size();
unsigned Val = OrigDispVal + DotDispVal;
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
- Val));
+ InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
}
NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
@@ -1613,7 +1624,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
return nullptr;
// Don't emit the offset operator.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
+ InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
// The offset operator will have an 'r' constraint, thus we need to create
// register operand to ensure proper matching. Just pick a GPR based on
@@ -1664,7 +1675,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
// Rewrite the type operator and the C or C++ type or variable in terms of an
// immediate. E.g. TYPE foo -> $$4
unsigned Len = End.getPointer() - TypeLoc.getPointer();
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
+ InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
return X86Operand::CreateImm(Imm, Start, End);
@@ -1688,12 +1699,14 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
return ParseIntelOperator(IOK_TYPE);
}
+ bool PtrInOperand = false;
unsigned Size = getIntelMemOperandSize(Tok.getString());
if (Size) {
Parser.Lex(); // Eat operand size (e.g., byte, word).
if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
Parser.Lex(); // Eat ptr.
+ PtrInOperand = true;
}
Start = Tok.getLoc();
@@ -1711,10 +1724,10 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
if (StartTok.getString().size() == Len)
// Just add a prefix if this wasn't a complex immediate expression.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+ InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
else
// Otherwise, rewrite the complex expression as a single immediate.
- InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+ InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
}
if (getLexer().isNot(AsmToken::LBrac)) {
@@ -1740,7 +1753,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
}
// rounding mode token
- if (STI.getFeatureBits()[X86::FeatureAVX512] &&
+ if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
getLexer().is(AsmToken::LCurly))
return ParseRoundingModeOp(Start, End);
@@ -1749,9 +1762,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
if (!ParseRegister(RegNo, Start, End)) {
// If this is a segment register followed by a ':', then this is the start
// of a segment override, otherwise this is a normal register reference.
- if (getLexer().isNot(AsmToken::Colon))
+ // In case it is a normal register and there is ptr in the operand this
+ // is an error
+ if (getLexer().isNot(AsmToken::Colon)){
+ if (PtrInOperand){
+ return ErrorOperand(Start, "expected memory operand after "
+ "'ptr', found register operand instead");
+ }
return X86Operand::CreateReg(RegNo, Start, End);
-
+ }
+
return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
}
@@ -1798,7 +1818,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
}
case AsmToken::LCurly:{
SMLoc Start = Parser.getTok().getLoc(), End;
- if (STI.getFeatureBits()[X86::FeatureAVX512])
+ if (getSTI().getFeatureBits()[X86::FeatureAVX512])
return ParseRoundingModeOp(Start, End);
return ErrorOperand(Start, "unknown token in expression");
}
@@ -1808,7 +1828,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
const MCParsedAsmOperand &Op) {
MCAsmParser &Parser = getParser();
- if(STI.getFeatureBits()[X86::FeatureAVX512]) {
+ if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
const SMLoc consumedToken = consumeToken();
@@ -1983,12 +2003,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
}
// Validate the scale amount.
- if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+ if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
ScaleVal != 1) {
Error(Loc, "scale factor in 16-bit address must be 1");
return nullptr;
- }
- if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+ }
+ if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
+ ScaleVal != 8) {
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
return nullptr;
}
@@ -2175,7 +2196,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Name == "repne" || Name == "repnz" ||
Name == "rex64" || Name == "data16";
-
// This does the actual operand parsing. Don't parse any more if we have a
// prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
// just want to parse the "lock" as the first instruction and the "incl" as
@@ -2213,6 +2233,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
(isPrefix && getLexer().is(AsmToken::Slash)))
Parser.Lex();
+ // This is for gas compatibility and cannot be done in td.
+ // Adding "p" for some floating point with no argument.
+ // For example: fsub --> fsubp
+ bool IsFp =
+ Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
+ if (IsFp && Operands.size() == 1) {
+ const char *Repl = StringSwitch<const char *>(Name)
+ .Case("fsub", "fsubp")
+ .Case("fdiv", "fdivp")
+ .Case("fsubr", "fsubrp")
+ .Case("fdivr", "fdivrp");
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
+ }
+
// This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
// "outb %al, %dx". Out doesn't take a memory form, but this is a widely
// documented form in various unofficial manuals, so a lot of code uses it.
@@ -2242,9 +2276,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// Append default arguments to "ins[bwld]"
if (Name.startswith("ins") && Operands.size() == 1 &&
- (Name == "insb" || Name == "insw" || Name == "insl" ||
- Name == "insd" )) {
- AddDefaultSrcDestOperands(Operands,
+ (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
+ AddDefaultSrcDestOperands(Operands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
}
@@ -2346,98 +2379,21 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
- if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
- cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
- Operands.erase(Operands.begin() + 1);
- static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
- }
+ if (Op1.isImm())
+ if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
+ if (CE->getValue() == 3) {
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
+ }
}
return false;
}
-static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
- bool isCmp) {
- MCInst TmpInst;
- TmpInst.setOpcode(Opcode);
- if (!isCmp)
- TmpInst.addOperand(MCOperand::createReg(Reg));
- TmpInst.addOperand(MCOperand::createReg(Reg));
- TmpInst.addOperand(Inst.getOperand(0));
- Inst = TmpInst;
- return true;
-}
-
-static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
-}
-
-static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
-}
-
-static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
- bool isCmp = false) {
- if (!Inst.getOperand(0).isImm() ||
- !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
- return false;
-
- return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
-}
-
-bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
- switch (Inst.getOpcode()) {
- default: return true;
- case X86::INT:
- X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
- assert(Op.isImm() && "expected immediate");
- int64_t Res;
- if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
- Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
- return false;
- }
- return true;
- }
- llvm_unreachable("handle the instruction appropriately");
-}
-
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
- case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
- case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
- case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
- case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
- case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
- case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
- case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
- case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
- case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
- case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
- case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
- case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
- case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
- case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
- case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
- case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
- case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
- case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
- case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
- case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
- case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
- case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
- case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
- case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+ case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
@@ -2457,18 +2413,19 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
Inst.setOpcode(NewOpc);
return true;
@@ -2573,9 +2530,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
isParsingIntelSyntax())) {
default: llvm_unreachable("Unexpected match result!");
case Match_Success:
- if (!validateInstruction(Inst, Operands))
- return true;
-
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the
// individual transformations can chain off each other.
@@ -2819,9 +2773,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned NumSuccessfulMatches =
std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
- if (!validateInstruction(Inst, Operands))
- return true;
-
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
// transformations can chain off each other.
@@ -2898,10 +2849,29 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
"a '%' prefix in .intel_syntax");
}
return false;
- }
+ } else if (IDVal == ".even")
+ return parseDirectiveEven(DirectiveID.getLoc());
return true;
}
+/// parseDirectiveEven
+/// ::= .even
+bool X86AsmParser::parseDirectiveEven(SMLoc L) {
+ const MCSection *Section = getStreamer().getCurrentSection().first;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ TokError("unexpected token in directive");
+ return false;
+ }
+ if (!Section) {
+ getStreamer().InitSections(false);
+ Section = getStreamer().getCurrentSection().first;
+ }
+ if (Section->UseCodeAlign())
+ getStreamer().EmitCodeAlignment(2, 0);
+ else
+ getStreamer().EmitValueToAlignment(2, 0, 1, 0);
+ return false;
+}
/// ParseDirectiveWord
/// ::= .word [ expression (, expression)* ]
bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
@@ -2909,10 +2879,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
+ SMLoc ExprLoc = getLexer().getLoc();
if (getParser().parseExpression(Value))
return false;
- getParser().getStreamer().EmitValue(Value, Size);
+ if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ assert(Size <= 8 && "Invalid size");
+ uint64_t IntValue = MCE->getValue();
+ if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+ return Error(ExprLoc, "literal value out of range for directive");
+ getStreamer().EmitIntValue(IntValue, Size);
+ } else {
+ getStreamer().EmitValue(Value, Size, ExprLoc);
+ }
if (getLexer().is(AsmToken::EndOfStatement))
break;
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 7610806..54538c8 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -13,30 +13,25 @@
namespace llvm {
inline bool isImmSExti16i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value) ||
+ (isUInt<16>(Value) && isInt<8>(static_cast<int16_t>(Value)));
}
inline bool isImmSExti32i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value) ||
+ (isUInt<32>(Value) && isInt<8>(static_cast<int32_t>(Value)));
}
inline bool isImmSExti64i8Value(uint64_t Value) {
- return (( Value <= 0x000000000000007FULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<8>(Value);
}
inline bool isImmSExti64i32Value(uint64_t Value) {
- return (( Value <= 0x000000007FFFFFFFULL)||
- (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isInt<32>(Value);
}
inline bool isImmUnsignedi8Value(uint64_t Value) {
- return (( Value <= 0x00000000000000FFULL)||
- (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ return isUInt<8>(Value) || isInt<8>(Value);
}
} // End of namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index cfc3ee2..ce8fcf1 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -95,11 +95,13 @@ X86GenericDisassembler::X86GenericDisassembler(
llvm_unreachable("Invalid CPU mode");
}
+namespace {
struct Region {
ArrayRef<uint8_t> Bytes;
uint64_t Base;
Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
};
+} // end anonymous namespace
/// A callback function that wraps the readByte method from Region.
///
@@ -831,8 +833,12 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM256:
case TYPE_XMM512:
case TYPE_VK1:
+ case TYPE_VK2:
+ case TYPE_VK4:
case TYPE_VK8:
case TYPE_VK16:
+ case TYPE_VK32:
+ case TYPE_VK64:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
case TYPE_BNDR:
@@ -962,6 +968,7 @@ static bool translateInstruction(MCInst &mcInst,
return true;
}
+ mcInst.clear();
mcInst.setOpcode(insn.instructionID);
// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
// prefix bytes should be disassembled as xrelease and xacquire then set the
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index f73fa75..040143b 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -361,7 +361,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
* then it should be disassembled as a xacquire/xrelease not repne/rep.
*/
if ((byte == 0xf2 || byte == 0xf3) &&
- ((nextByte == 0xf0) |
+ ((nextByte == 0xf0) ||
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
insn->xAcquireRelease = true;
/*
@@ -980,6 +980,47 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
insn->opcode == 0xE3)
attrMask ^= ATTR_ADSIZE;
+ /*
+ * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
+ * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
+ */
+
+ if (insn->mode == MODE_64BIT &&
+ isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
+ switch (insn->opcode) {
+ case 0xE8:
+ case 0xE9:
+ // Take care of psubsb and other mmx instructions.
+ if (insn->opcodeType == ONEBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ case 0x82:
+ case 0x83:
+ case 0x84:
+ case 0x85:
+ case 0x86:
+ case 0x87:
+ case 0x88:
+ case 0x89:
+ case 0x8A:
+ case 0x8B:
+ case 0x8C:
+ case 0x8D:
+ case 0x8E:
+ case 0x8F:
+ // Take care of lea and three byte ops.
+ if (insn->opcodeType == TWOBYTE) {
+ attrMask ^= ATTR_OPSIZE;
+ insn->immediateSize = 4;
+ insn->displacementSize = 4;
+ }
+ break;
+ }
+ }
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -1447,8 +1488,12 @@ static int readModRM(struct InternalInstruction* insn) {
case TYPE_XMM: \
return prefix##_XMM0 + index; \
case TYPE_VK1: \
+ case TYPE_VK2: \
+ case TYPE_VK4: \
case TYPE_VK8: \
case TYPE_VK16: \
+ case TYPE_VK32: \
+ case TYPE_VK64: \
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a79a923..28a628e 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -572,8 +572,6 @@ struct InternalInstruction {
// The last byte of the opcode, not counting any ModR/M extension
uint8_t opcode;
- // The ModR/M byte of the instruction, if it is an opcode extension
- uint8_t modRMExtension;
// decode state
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index ea727e6..b4c0bc4 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 62b6b73..bbb3090 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -15,12 +15,9 @@
#define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
-class MCOperand;
-
class X86ATTInstPrinter final : public MCInstPrinter {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 91b144a..82f0ee5 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -21,6 +21,27 @@
using namespace llvm;
+static unsigned getVectorRegSize(unsigned RegNo) {
+ if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
+ return 512;
+ if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
+ return 256;
+ if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
+ return 128;
+ if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
+ return 64;
+
+ llvm_unreachable("Unknown vector reg!");
+ return 0;
+}
+
+static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
+ unsigned OperandIndex) {
+ unsigned OpReg = MI->getOperand(OperandIndex).getReg();
+ return MVT::getVectorVT(ScalarVT,
+ getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
+}
+
/// \brief Extracts the src/dst types for a given zero extension instruction.
/// \note While the number of elements in DstVT type correct, the
/// number in the SrcVT type is expanded to fill the src xmm register and the
@@ -107,6 +128,75 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
}
}
+#define CASE_MASK_INS_COMMON(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src: \
+ case X86::V##Inst##Suffix##src##k: \
+ case X86::V##Inst##Suffix##src##kz:
+
+#define CASE_SSE_INS_COMMON(Inst, src) \
+ case X86::Inst##src:
+
+#define CASE_AVX_INS_COMMON(Inst, Suffix, src) \
+ case X86::V##Inst##Suffix##src:
+
+#define CASE_MOVDUP(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
+ CASE_AVX_INS_COMMON(Inst, , r##src) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src) \
+ CASE_SSE_INS_COMMON(Inst, r##src) \
+
+#define CASE_UNPCK(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src) \
+ CASE_AVX_INS_COMMON(Inst, , r##src) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src) \
+ CASE_SSE_INS_COMMON(Inst, r##src) \
+
+#define CASE_SHUF(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, r##src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, , r##src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, r##src##i) \
+ CASE_SSE_INS_COMMON(Inst, r##src##i) \
+
+#define CASE_VPERM(Inst, src) \
+ CASE_MASK_INS_COMMON(Inst, Z, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z256, src##i) \
+ CASE_MASK_INS_COMMON(Inst, Z128, src##i) \
+ CASE_AVX_INS_COMMON(Inst, , src##i) \
+ CASE_AVX_INS_COMMON(Inst, Y, src##i) \
+
+#define CASE_VSHUF(Inst, src) \
+ CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i) \
+ CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i) \
+
+/// \brief Extracts the types and if it has memory operand for a given
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction.
+static void getVSHUF64x2FamilyInfo(const MCInst *MI, MVT &VT, bool &HasMemOp) {
+ HasMemOp = false;
+ switch (MI->getOpcode()) {
+ default:
+ llvm_unreachable("Unknown VSHUF64x2 family instructions.");
+ break;
+ CASE_VSHUF(64X2, m)
+ HasMemOp = true; // FALL THROUGH.
+ CASE_VSHUF(64X2, r)
+ VT = getRegOperandVectorVT(MI, MVT::i64, 0);
+ break;
+ CASE_VSHUF(32X4, m)
+ HasMemOp = true; // FALL THROUGH.
+ CASE_VSHUF(32X4, r)
+ VT = getRegOperandVectorVT(MI, MVT::i32, 0);
+ break;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Top Level Entrypoint
//===----------------------------------------------------------------------===//
@@ -127,23 +217,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::BLENDPDrri:
case X86::VBLENDPDrri:
+ case X86::VBLENDPDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::BLENDPDrmi:
case X86::VBLENDPDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v2f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VBLENDPDYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VBLENDPDYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4f64,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -152,23 +233,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::BLENDPSrri:
case X86::VBLENDPSrri:
+ case X86::VBLENDPSYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::BLENDPSrmi:
case X86::VBLENDPSrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VBLENDPSYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VBLENDPSYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8f32,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -177,23 +249,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::PBLENDWrri:
case X86::VPBLENDWrri:
+ case X86::VPBLENDWYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PBLENDWrmi:
case X86::VPBLENDWrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPBLENDWYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VPBLENDWYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v16i16,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -201,23 +264,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
break;
case X86::VPBLENDDrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPBLENDDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v4i32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
-
case X86::VPBLENDDYrri:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
+ case X86::VPBLENDDrmi:
case X86::VPBLENDDYrmi:
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeBLENDMask(MVT::v8i32,
+ DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -239,6 +292,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVLHPSrr:
case X86::VMOVLHPSrr:
+ case X86::VMOVLHPSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,569 +301,327 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVHLPSrr:
case X86::VMOVHLPSrr:
+ case X86::VMOVHLPSZrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
DecodeMOVHLPSMask(2, ShuffleMask);
break;
- case X86::MOVSLDUPrr:
- case X86::VMOVSLDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::MOVSLDUPrm:
- case X86::VMOVSLDUPrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
- break;
-
- case X86::VMOVSHDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VMOVSHDUPYrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
- break;
-
- case X86::VMOVSLDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVSLDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::VMOVSLDUPYrm:
+ CASE_MOVDUP(MOVSLDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
+ DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
break;
- case X86::MOVSHDUPrr:
- case X86::VMOVSHDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVSHDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::MOVSHDUPrm:
- case X86::VMOVSHDUPrm:
+ CASE_MOVDUP(MOVSHDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
+ DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
break;
- case X86::VMOVDDUPYrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_MOVDUP(MOVDDUP, r)
+ Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
// FALL THROUGH.
- case X86::VMOVDDUPYrm:
+ CASE_MOVDUP(MOVDDUP, m)
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
- break;
-
- case X86::MOVDDUPrr:
- case X86::VMOVDDUPrr:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::MOVDDUPrm:
- case X86::VMOVDDUPrm:
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
+ DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
break;
case X86::PSLLDQri:
case X86::VPSLLDQri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSLLDQMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
-
case X86::VPSLLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSLLDQMask(MVT::v32i8,
+ DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSRLDQri:
case X86::VPSRLDQri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSRLDQMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
-
case X86::VPSRLDQYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSRLDQMask(MVT::v32i8,
+ DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PALIGNR128rr:
case X86::VPALIGNR128rr:
+ case X86::VPALIGNR256rr:
Src1Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PALIGNR128rm:
case X86::VPALIGNR128rm:
- Src2Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePALIGNRMask(MVT::v16i8,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPALIGNR256rr:
- Src1Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
case X86::VPALIGNR256rm:
Src2Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePALIGNRMask(MVT::v32i8,
+ DecodePALIGNRMask(getRegOperandVectorVT(MI, MVT::i8, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSHUFDri:
case X86::VPSHUFDri:
+ case X86::VPSHUFDYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFDmi:
case X86::VPSHUFDmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4i32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFDYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFDYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v8i32,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::i32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
case X86::PSHUFHWri:
case X86::VPSHUFHWri:
+ case X86::VPSHUFHWYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFHWmi:
case X86::VPSHUFHWmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFHWMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFHWYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFHWYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFHWMask(MVT::v16i16,
+ DecodePSHUFHWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
+
case X86::PSHUFLWri:
case X86::VPSHUFLWri:
+ case X86::VPSHUFLWYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PSHUFLWmi:
case X86::VPSHUFLWmi:
- DestName = getRegName(MI->getOperand(0).getReg());
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFLWMask(MVT::v8i16,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- break;
- case X86::VPSHUFLWYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
case X86::VPSHUFLWYmi:
DestName = getRegName(MI->getOperand(0).getReg());
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFLWMask(MVT::v16i16,
+ DecodePSHUFLWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
break;
- case X86::PUNPCKHBWrr:
- case X86::VPUNPCKHBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHBWrm:
- case X86::VPUNPCKHBWrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
- break;
- case X86::VPUNPCKHBWYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHBWYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
- break;
- case X86::PUNPCKHWDrr:
- case X86::VPUNPCKHWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHWDrm:
- case X86::VPUNPCKHWDrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
- break;
- case X86::VPUNPCKHWDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHWDYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
- break;
- case X86::PUNPCKHDQrr:
- case X86::VPUNPCKHDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHDQrm:
- case X86::VPUNPCKHDQrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
- break;
- case X86::VPUNPCKHDQYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHDQYrm:
+ case X86::MMX_PSHUFWri:
Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
- break;
- case X86::VPUNPCKHDQZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKHDQZrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);
- break;
- case X86::PUNPCKHQDQrr:
- case X86::VPUNPCKHQDQrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKHQDQrm:
- case X86::VPUNPCKHQDQrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::MMX_PSHUFWmi:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
- break;
- case X86::VPUNPCKHQDQYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHQDQYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
- break;
- case X86::VPUNPCKHQDQZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKHQDQZrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);
+ if (MI->getOperand(MI->getNumOperands() - 1).isImm())
+ DecodePSHUFMask(MVT::v4i16,
+ MI->getOperand(MI->getNumOperands() - 1).getImm(),
+ ShuffleMask);
break;
- case X86::PUNPCKLBWrr:
- case X86::VPUNPCKLBWrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::PUNPCKLBWrm:
- case X86::VPUNPCKLBWrm:
+ case X86::PSWAPDrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
- break;
- case X86::VPUNPCKLBWYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VPUNPCKLBWYrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
- break;
- case X86::PUNPCKLWDrr:
- case X86::VPUNPCKLWDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLWDrm:
- case X86::VPUNPCKLWDrm:
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ case X86::PSWAPDrm:
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+ DecodePSWAPMask(MVT::v2i32, ShuffleMask);
break;
- case X86::VPUNPCKLWDYrr:
+
+ CASE_UNPCK(PUNPCKHBW, r)
+ case X86::MMX_PUNPCKHBWirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLWDYrm:
+ CASE_UNPCK(PUNPCKHBW, m)
+ case X86::MMX_PUNPCKHBWirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
- case X86::PUNPCKLDQrr:
- case X86::VPUNPCKLDQrr:
+
+ CASE_UNPCK(PUNPCKHWD, r)
+ case X86::MMX_PUNPCKHWDirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLDQrm:
- case X86::VPUNPCKLDQrm:
+ CASE_UNPCK(PUNPCKHWD, m)
+ case X86::MMX_PUNPCKHWDirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
- case X86::VPUNPCKLDQYrr:
+
+ CASE_UNPCK(PUNPCKHDQ, r)
+ case X86::MMX_PUNPCKHDQirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLDQYrm:
+ CASE_UNPCK(PUNPCKHDQ, m)
+ case X86::MMX_PUNPCKHDQirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::VPUNPCKLDQZrr:
+
+ CASE_UNPCK(PUNPCKHQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLDQZrm:
+ CASE_UNPCK(PUNPCKHQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
- case X86::PUNPCKLQDQrr:
- case X86::VPUNPCKLQDQrr:
+
+ CASE_UNPCK(PUNPCKLBW, r)
+ case X86::MMX_PUNPCKLBWirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::PUNPCKLQDQrm:
- case X86::VPUNPCKLQDQrm:
+ CASE_UNPCK(PUNPCKLBW, m)
+ case X86::MMX_PUNPCKLBWirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
break;
- case X86::VPUNPCKLQDQYrr:
+
+ CASE_UNPCK(PUNPCKLWD, r)
+ case X86::MMX_PUNPCKLWDirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLQDQYrm:
+ CASE_UNPCK(PUNPCKLWD, m)
+ case X86::MMX_PUNPCKLWDirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
break;
- case X86::VPUNPCKLQDQZrr:
+
+ CASE_UNPCK(PUNPCKLDQ, r)
+ case X86::MMX_PUNPCKLDQirr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VPUNPCKLQDQZrm:
+ CASE_UNPCK(PUNPCKLDQ, m)
+ case X86::MMX_PUNPCKLDQirm:
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
- DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
break;
- case X86::SHUFPDrri:
- case X86::VSHUFPDrri:
+ CASE_UNPCK(PUNPCKLQDQ, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPDrmi:
- case X86::VSHUFPDrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v2f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VSHUFPDYrri:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VSHUFPDYrmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v4f64,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
+ CASE_UNPCK(PUNPCKLQDQ, m)
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
break;
- case X86::SHUFPSrri:
- case X86::VSHUFPSrri:
+ CASE_SHUF(SHUFPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::SHUFPSrmi:
- case X86::VSHUFPSrmi:
+ CASE_SHUF(SHUFPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v4f32,
+ DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VSHUFPSYrri:
+
+ CASE_SHUF(SHUFPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VSHUFPSYrmi:
+ CASE_SHUF(SHUFPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodeSHUFPMask(MVT::v8f32,
+ DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKLPDrr:
- case X86::VUNPCKLPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPDrm:
- case X86::VUNPCKLPDrm:
- DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPDYrm:
- DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPDZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPDZrm:
- DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::UNPCKLPSrr:
- case X86::VUNPCKLPSrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKLPSrm:
- case X86::VUNPCKLPSrm:
- DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPSYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPSYrm:
- DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKLPSZrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKLPSZrm:
- DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::UNPCKHPDrr:
- case X86::VUNPCKHPDrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::UNPCKHPDrm:
- case X86::VUNPCKHPDrm:
- DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VUNPCKHPDYrr:
- Src2Name = getRegName(MI->getOperand(2).getReg());
- // FALL THROUGH.
- case X86::VUNPCKHPDYrm:
- DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
- Src1Name = getRegName(MI->getOperand(1).getReg());
+ CASE_VSHUF(64X2, r)
+ CASE_VSHUF(64X2, m)
+ CASE_VSHUF(32X4, r)
+ CASE_VSHUF(32X4, m) {
+ MVT VT;
+ bool HasMemOp;
+ unsigned NumOp = MI->getNumOperands();
+ getVSHUF64x2FamilyInfo(MI, VT, HasMemOp);
+ decodeVSHUF64x2FamilyMask(VT, MI->getOperand(NumOp - 1).getImm(),
+ ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
+ if (HasMemOp) {
+ assert((NumOp >= 8) && "Expected at least 8 operands!");
+ Src1Name = getRegName(MI->getOperand(NumOp - 7).getReg());
+ } else {
+ assert((NumOp >= 4) && "Expected at least 4 operands!");
+ Src2Name = getRegName(MI->getOperand(NumOp - 2).getReg());
+ Src1Name = getRegName(MI->getOperand(NumOp - 3).getReg());
+ }
break;
- case X86::VUNPCKHPDZrr:
+ }
+
+ CASE_UNPCK(UNPCKLPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPDZrm:
- DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);
+ CASE_UNPCK(UNPCKLPD, m)
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::UNPCKHPSrr:
- case X86::VUNPCKHPSrr:
+
+ CASE_UNPCK(UNPCKLPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::UNPCKHPSrm:
- case X86::VUNPCKHPSrm:
- DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+ CASE_UNPCK(UNPCKLPS, m)
+ DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VUNPCKHPSYrr:
+
+ CASE_UNPCK(UNPCKHPD, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPSYrm:
- DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+ CASE_UNPCK(UNPCKHPD, m)
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VUNPCKHPSZrr:
+
+ CASE_UNPCK(UNPCKHPS, r)
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
- case X86::VUNPCKHPSZrm:
- DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);
+ CASE_UNPCK(UNPCKHPS, m)
+ DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPSri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VPERMILPSmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPERMILPSYri:
- Src1Name = getRegName(MI->getOperand(1).getReg());
- // FALL THROUGH.
- case X86::VPERMILPSYmi:
- if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v8f32,
- MI->getOperand(MI->getNumOperands() - 1).getImm(),
- ShuffleMask);
- DestName = getRegName(MI->getOperand(0).getReg());
- break;
- case X86::VPERMILPDri:
+
+ CASE_VPERM(PERMILPS, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPDmi:
+ CASE_VPERM(PERMILPS, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v2f64,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
- case X86::VPERMILPDYri:
+
+ CASE_VPERM(PERMILPD, r)
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
- case X86::VPERMILPDYmi:
+ CASE_VPERM(PERMILPD, m)
if (MI->getOperand(MI->getNumOperands() - 1).isImm())
- DecodePSHUFMask(MVT::v4f64,
+ DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
MI->getOperand(MI->getNumOperands() - 1).getImm(),
ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::VPERM2F128rr:
case X86::VPERM2I128rr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -824,6 +636,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src1Name = getRegName(MI->getOperand(1).getReg());
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::VPERMQYri:
case X86::VPERMPDYri:
Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -846,6 +659,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::MOVSSrr:
case X86::VMOVSSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -861,6 +675,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::MOVZPQILo2PQIrr:
case X86::VMOVPQI2QIrr:
case X86::VMOVZPQILo2PQIrr:
+ case X86::VMOVZPQILo2PQIZrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::MOVQI2PQIrm:
@@ -869,9 +684,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
case X86::VMOVQI2PQIrm:
case X86::VMOVZQI2PQIrm:
case X86::VMOVZPQILo2PQIrm:
+ case X86::VMOVZPQILo2PQIZrm:
DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
break;
+
case X86::MOVDI2PDIrm:
case X86::VMOVDI2PDIrm:
DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6e371da..20cd7ff 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -19,8 +19,6 @@
namespace llvm {
-class MCOperand;
-
class X86IntelInstPrinter final : public MCInstPrinter {
public:
X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 629802f..133bd0e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -69,15 +69,19 @@ public:
class X86AsmBackend : public MCAsmBackend {
const StringRef CPU;
bool HasNopl;
- const uint64_t MaxNopLength;
+ uint64_t MaxNopLength;
public:
- X86AsmBackend(const Target &T, StringRef CPU)
- : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
+ X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
CPU != "c3" && CPU != "c3-2";
+ // Max length of true long nop instruction is 15 bytes.
+ // Max length of long nop replacement instruction is 7 bytes.
+ // Taking into account SilverMont architecture features max length of nops
+ // is reduced for it to achieve better performance.
+ MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
}
unsigned getNumFixupKinds() const override {
@@ -200,6 +204,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::ADD64ri8: return X86::ADD64ri32;
case X86::ADD64mi8: return X86::ADD64mi32;
+ // ADC
+ case X86::ADC16ri8: return X86::ADC16ri;
+ case X86::ADC16mi8: return X86::ADC16mi;
+ case X86::ADC32ri8: return X86::ADC32ri;
+ case X86::ADC32mi8: return X86::ADC32mi;
+ case X86::ADC64ri8: return X86::ADC64ri32;
+ case X86::ADC64mi8: return X86::ADC64mi32;
+
// SUB
case X86::SUB16ri8: return X86::SUB16ri;
case X86::SUB16mi8: return X86::SUB16mi;
@@ -208,6 +220,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::SUB64ri8: return X86::SUB64ri32;
case X86::SUB64mi8: return X86::SUB64mi32;
+ // SBB
+ case X86::SBB16ri8: return X86::SBB16ri;
+ case X86::SBB16mi8: return X86::SBB16mi;
+ case X86::SBB32ri8: return X86::SBB32ri;
+ case X86::SBB32mi8: return X86::SBB32mi;
+ case X86::SBB64ri8: return X86::SBB64ri32;
+ case X86::SBB64mi8: return X86::SBB64mi32;
+
// CMP
case X86::CMP16ri8: return X86::CMP16ri;
case X86::CMP16mi8: return X86::CMP16mi;
@@ -279,7 +299,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
/// bytes.
/// \return - true on success, false on failure
bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- static const uint8_t Nops[10][10] = {
+ static const uint8_t TrueNops[10][10] = {
// nop
{0x90},
// xchg %ax,%ax
@@ -302,17 +322,31 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
{0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
};
- // This CPU doesn't support long nops. If needed add more.
- // FIXME: Can we get this from the subtarget somehow?
- // FIXME: We could generated something better than plain 0x90.
- if (!HasNopl) {
- for (uint64_t i = 0; i < Count; ++i)
- OW->write8(0x90);
- return true;
- }
+ // Alternative nop instructions for CPUs which don't support long nops.
+ static const uint8_t AltNops[7][10] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0x76, 0x00},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0x74, 0x26, 0x00},
+ // nop + lea 0x0(%esi),%esi
+ {0x90, 0x8d, 0x74, 0x26, 0x00},
+ // lea 0x0(%esi),%esi
+ {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
+ // lea 0x0(%esi),%esi
+ {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
+ };
+
+ // Select the right NOP table.
+ // FIXME: Can we get if CPU supports long nops from the subtarget somehow?
+ const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
+ assert(HasNopl || MaxNopLength <= 7);
- // 15 is the longest single nop instruction. Emit as many 15-byte nops as
- // needed, then emit a nop of the remaining length.
+ // Emit as many largest nops as needed, then emit a nop of the remaining
+ // length.
do {
const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
@@ -359,6 +393,17 @@ public:
}
};
+class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
+ ELF::EM_IAMCU);
+ }
+};
+
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
@@ -610,13 +655,13 @@ private:
/// \brief Get the compact unwind number for a given register. The number
/// corresponds to the enum lists in compact_unwind_encoding.h.
int getCompactUnwindRegNum(unsigned Reg) const {
- static const uint16_t CU32BitRegs[7] = {
+ static const MCPhysReg CU32BitRegs[7] = {
X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
};
- static const uint16_t CU64BitRegs[] = {
+ static const MCPhysReg CU64BitRegs[] = {
X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
};
- const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+ const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
if (*CURegs == Reg)
return Idx;
@@ -780,6 +825,10 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
return new WindowsX86AsmBackend(T, false, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+
+ if (TheTriple.isOSIAMCU())
+ return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU);
+
return new ELFX86_32AsmBackend(T, OSABI, CPU);
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index f0d00b0..9ff85b9 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,6 +41,16 @@ namespace X86 {
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
+
+ /// AVX512 static rounding constants. These need to match the values in
+ /// avx512fintrin.h.
+ enum STATIC_ROUNDING {
+ TO_NEAREST_INT = 0,
+ TO_NEG_INF = 1,
+ TO_POS_INF = 2,
+ TO_ZERO = 3,
+ CUR_DIRECTION = 4
+ };
} // end namespace X86;
/// X86II - This namespace holds all of the target specific flags that
@@ -675,7 +685,7 @@ namespace X86II {
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
case X86II::RawFrmDstSrc:
- return -1;
+ return -1;
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem:
@@ -696,23 +706,27 @@ namespace X86II {
// Start from 0, skip registers encoded in VEX_VVVV or a mask register.
return 0 + HasVEX_4V + HasEVEX_K;
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
- case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
+ case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
+ case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
+ case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
- case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
- case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
- case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
- case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
- case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
- case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
- case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
- case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
- case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
- case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
- case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
- case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
- case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
- case X86II::MRM_FE: case X86II::MRM_FF:
+ case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
+ case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
+ case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+ case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
+ case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
+ case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
+ case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
+ case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
+ case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
+ case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
+ case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
+ case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
+ case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
+ case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
+ case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
+ case X86II::MRM_FF:
return -1;
}
}
@@ -740,7 +754,7 @@ namespace X86II {
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::CR8: case X86::CR9: case X86::CR10: case X86::CR11:
case X86::CR12: case X86::CR13: case X86::CR14: case X86::CR15:
- return true;
+ return true;
}
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index a33468d..736c39d 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -32,9 +32,11 @@ namespace {
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
- : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
- // Only i386 uses Rel instead of RelA.
- /*HasRelocationAddend*/ EMachine != ELF::EM_386) {}
+ : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
+ // Only i386 and IAMCU use Rel instead of RelA.
+ /*HasRelocationAddend*/
+ (EMachine != ELF::EM_386) &&
+ (EMachine != ELF::EM_IAMCU)) {}
X86ELFObjectWriter::~X86ELFObjectWriter()
{}
@@ -246,7 +248,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
if (getEMachine() == ELF::EM_X86_64)
return getRelocType64(Modifier, Type, IsPCRel);
- assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+ assert((getEMachine() == ELF::EM_386 || getEMachine() == ELF::EM_IAMCU) &&
+ "Unsupported ELF machine type.");
return getRelocType32(Modifier, getType32(Type), IsPCRel);
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index deaad2a..30d5c80 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -20,39 +20,42 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
- class Triple;
-
- class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
-
- public:
- explicit X86MCAsmInfoDarwin(const Triple &Triple);
- };
-
- struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
- explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
- const MCExpr *
- getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
- MCStreamer &Streamer) const override;
- };
-
- class X86ELFMCAsmInfo : public MCAsmInfoELF {
- void anchor() override;
- public:
- explicit X86ELFMCAsmInfo(const Triple &Triple);
- };
-
- class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor() override;
- public:
- explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
- };
-
- class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
- void anchor() override;
- public:
- explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
- };
+class Triple;
+
+class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ virtual void anchor();
+
+public:
+ explicit X86MCAsmInfoDarwin(const Triple &Triple);
+};
+
+struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const override;
+};
+
+class X86ELFMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+
+public:
+ explicit X86ELFMCAsmInfo(const Triple &Triple);
+};
+
+class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+};
+
+class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+ void anchor() override;
+
+public:
+ explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
+};
} // namespace llvm
#endif
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 10c434c..dfab6ec 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -510,8 +510,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
- EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
- Fixups);
+ EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+ CurByte, OS, Fixups);
return;
}
@@ -988,6 +988,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCInstrDesc &Desc) {
unsigned REX = 0;
+ bool UsesHighByteReg = false;
+
if (TSFlags & X86II::REX_W)
REX |= 1 << 3; // set REX.W
@@ -1004,6 +1006,8 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const MCOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ UsesHighByteReg = true;
if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
@@ -1073,6 +1077,9 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
}
break;
}
+ if (REX && UsesHighByteReg)
+ report_fatal_error("Cannot encode high byte register in REX-prefixed instruction");
+
return REX;
}
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 83b4091..53a6550 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -122,7 +122,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
} else if (TheTriple.isOSBinFormatELF()) {
// Force the use of an ELF container.
MAI = new X86ELFMCAsmInfo(TheTriple);
- } else if (TheTriple.isWindowsMSVCEnvironment()) {
+ } else if (TheTriple.isWindowsMSVCEnvironment() ||
+ TheTriple.isWindowsCoreCLREnvironment()) {
MAI = new X86MCAsmInfoMicrosoft(TheTriple);
} else if (TheTriple.isOSCygMing() ||
TheTriple.isWindowsItaniumEnvironment()) {
@@ -267,3 +268,184 @@ extern "C" void LLVMInitializeX86TargetMC() {
TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
}
+
+unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
+ bool High) {
+ switch (Size) {
+ default: return 0;
+ case 8:
+ if (High) {
+ switch (Reg) {
+ default: return getX86SubSuperRegisterOrZero(Reg, 64);
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AH;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DH;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CH;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BH;
+ }
+ } else {
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AL;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DL;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CL;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BL;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SIL;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DIL;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BPL;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SPL;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8B;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9B;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10B;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11B;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12B;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13B;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14B;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15B;
+ }
+ }
+ case 16:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::AX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::DX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::CX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::BX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::SI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::DI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::BP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::SP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8W;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9W;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10W;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11W;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12W;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13W;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14W;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15W;
+ }
+ case 32:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::EAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::EDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::ECX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::EBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::ESI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::EDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::EBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::ESP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8D;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9D;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10D;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11D;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12D;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13D;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14D;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15D;
+ }
+ case 64:
+ switch (Reg) {
+ default: return 0;
+ case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+ return X86::RAX;
+ case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+ return X86::RDX;
+ case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+ return X86::RCX;
+ case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+ return X86::RBX;
+ case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+ return X86::RSI;
+ case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+ return X86::RDI;
+ case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+ return X86::RBP;
+ case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+ return X86::RSP;
+ case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+ return X86::R8;
+ case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+ return X86::R9;
+ case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+ return X86::R10;
+ case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+ return X86::R11;
+ case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+ return X86::R12;
+ case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+ return X86::R13;
+ case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+ return X86::R14;
+ case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+ return X86::R15;
+ }
+ }
+}
+
+unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) {
+ unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
+ assert(Res != 0 && "Unexpected register or VT");
+ return Res;
+}
+
+
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6221bab..2d2836f 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -79,7 +79,7 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
/// Takes ownership of \p AB and \p CE.
MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS, MCCodeEmitter *CE,
- bool RelaxAll);
+ bool RelaxAll, bool IncrementalLinkerCompatible);
/// Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
@@ -98,6 +98,17 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
+
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX.
+/// Aborts on error.
+unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false);
+
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
+ bool High = false);
+
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 9e801fc..191ebea 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation(
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
- Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported relocation of modified symbol", false);
+ Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of modified symbol");
+ return;
+ }
// We don't support PCrel relocations of differences. Darwin 'as' doesn't
// implement most of these correctly.
- if (IsPCRel)
- report_fatal_error("unsupported pc-relative relocation of difference",
- false);
+ if (IsPCRel) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported pc-relative relocation of difference");
+ return;
+ }
// The support for the situation where one or both of the symbols would
// require a local relocation is handled just like if the symbols were
@@ -168,16 +173,20 @@ void X86MachObjectWriter::RecordX86_64Relocation(
// Darwin 'as' doesn't emit correct relocations for this (it ends up with a
// single SIGNED relocation); reject it for now. Except the case where both
// symbols don't have a base, equal but both NULL.
- if (A_Base == B_Base && A_Base)
- report_fatal_error("unsupported relocation with identical base", false);
+ if (A_Base == B_Base && A_Base) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation with identical base");
+ return;
+ }
// A subtraction expression where either symbol is undefined is a
// non-relocatable expression.
if (A->isUndefined() || B->isUndefined()) {
StringRef Name = A->isUndefined() ? A->getName() : B->getName();
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
"unsupported relocation with subtraction expression, symbol '" +
Name + "' can not be undefined in a subtraction expression");
+ return;
}
Value += Writer->getSymbolAddress(*A, Layout) -
@@ -244,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(
FixedValue = Res;
return;
} else {
- report_fatal_error("unsupported relocation of variable '" +
- Symbol->getName() + "'", false);
+ Asm.getContext().reportError(Fixup.getLoc(),
+ "unsupported relocation of variable '" +
+ Symbol->getName() + "'");
+ return;
}
} else {
- report_fatal_error("unsupported relocation of undefined symbol '" +
- Symbol->getName() + "'", false);
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
+ return;
}
MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -266,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
Type = MachO::X86_64_RELOC_TLV;
} else if (Modifier != MCSymbolRefExpr::VK_None) {
- report_fatal_error("unsupported symbol modifier in relocation",
- false);
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported symbol modifier in relocation");
+ return;
} else {
Type = MachO::X86_64_RELOC_SIGNED;
@@ -292,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(
}
}
} else {
- if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in branch "
- "relocation", false);
+ if (Modifier != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "unsupported symbol modifier in branch relocation");
+ return;
+ }
Type = MachO::X86_64_RELOC_BRANCH;
}
@@ -309,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation(
Type = MachO::X86_64_RELOC_GOT;
IsPCRel = 1;
} else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
- report_fatal_error("TLVP symbol modifier should have been rip-rel",
- false);
- } else if (Modifier != MCSymbolRefExpr::VK_None)
- report_fatal_error("unsupported symbol modifier in relocation", false);
- else {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel");
+ return;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(), "unsupported symbol modifier in relocation");
+ return;
+ } else {
Type = MachO::X86_64_RELOC_UNSIGNED;
unsigned Kind = Fixup.getKind();
- if (Kind == X86::reloc_signed_4byte)
- report_fatal_error("32-bit absolute addressing is not supported in "
- "64-bit mode", false);
+ if (Kind == X86::reloc_signed_4byte) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "32-bit absolute addressing is not supported in 64-bit mode");
+ return;
+ }
}
}
}
@@ -350,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- if (!A->getFragment())
- report_fatal_error("symbol '" + A->getName() +
- "' can not be undefined in a subtraction expression",
- false);
+ if (!A->getFragment()) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+ return false;
+ }
uint32_t Value = Writer->getSymbolAddress(*A, Layout);
uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -363,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (const MCSymbolRefExpr *B = Target.getSymB()) {
const MCSymbol *SB = &B->getSymbol();
- if (!SB->getFragment())
- report_fatal_error("symbol '" + B->getSymbol().getName() +
- "' can not be undefined in a subtraction expression",
- false);
+ if (!SB->getFragment()) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ "symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+ return false;
+ }
// Select the appropriate difference relocation type.
//
@@ -387,12 +416,12 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
if (FixupOffset > 0xffffff) {
char Buffer[32];
format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
- Asm.getContext().reportFatalError(Fixup.getLoc(),
+ Asm.getContext().reportError(Fixup.getLoc(),
Twine("Section too large, can't encode "
"r_address (") + Buffer +
") into 24 bits of scattered "
"relocation entry.");
- llvm_unreachable("fatal error returned?!");
+ return false;
}
MachO::any_relocation_info MRE;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 92f42b6..d045118 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -50,9 +50,11 @@ void X86WinCOFFStreamer::FinishImpl() {
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
raw_pwrite_stream &OS,
- MCCodeEmitter *CE, bool RelaxAll) {
+ MCCodeEmitter *CE, bool RelaxAll,
+ bool IncrementalLinkerCompatible) {
X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
+ S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
return S;
}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cae865a..4fdd527 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -140,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm,
}
}
-/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
/// VT indicates the type of the vector allowing it to handle different
/// datatypes and vector widths.
void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = VT.getSizeInBits() / 128;
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
unsigned NewImm = Imm;
@@ -191,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm,
}
}
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumHalfElts = NumElts / 2;
+
+ for (unsigned l = 0; l != NumHalfElts; ++l)
+ ShuffleMask.push_back(l + NumHalfElts);
+ for (unsigned h = 0; h != NumHalfElts; ++h)
+ ShuffleMask.push_back(h);
+}
+
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
@@ -222,7 +233,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
// independently on 128-bit lanes.
unsigned NumLanes = VT.getSizeInBits() / 128;
- if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
+ if (NumLanes == 0) NumLanes = 1; // Handle MMX
unsigned NumLaneElts = NumElts / NumLanes;
for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
@@ -253,6 +264,26 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
}
}
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned NumLanes = VT.getSizeInBits() / 128;
+ unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
+ unsigned ControlBitsMask = NumLanes - 1;
+ unsigned NumControlBits = NumLanes / 2;
+
+ for (unsigned l = 0; l != NumLanes; ++l) {
+ unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+ // We actually need the other source.
+ if (l >= NumLanes / 2)
+ LaneMask += NumLanes;
+ for (unsigned i = 0; i != NumElementsInLane; ++i)
+ ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+ }
+}
+
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
unsigned HalfSize = VT.getVectorNumElements() / 2;
@@ -277,10 +308,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
// <4 x i32> <i32 -2147483648, i32 -2147483648,
// i32 -2147483648, i32 -2147483648>
+#ifndef NDEBUG
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-
- if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
- return;
+ assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
+#endif
// This is a straightforward byte vector.
if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
@@ -290,7 +321,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0; i < NumElements; ++i) {
// For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
// lane of the vector we're inside.
- int Base = i < 16 ? 0 : 16;
+ int Base = i & ~0xf;
Constant *COp = C->getAggregateElement(i);
if (!COp) {
ShuffleMask.clear();
@@ -357,44 +388,66 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
}
}
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
- assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
- assert(MaskTy->getVectorElementType()->isIntegerTy() &&
- "Expected integer constant mask elements!");
- int ElementBits = MaskTy->getScalarSizeInBits();
- int NumElements = MaskTy->getVectorNumElements();
+ // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+ // constant pool uniques constants by their bit representation.
+ // e.g. the following take up the same space in the constant pool:
+ // i128 -170141183420855150465331762880109871104
+ //
+ // <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+ //
+ // <4 x i32> <i32 -2147483648, i32 -2147483648,
+ // i32 -2147483648, i32 -2147483648>
+
+ unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+ if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+ return;
+
+ // Only support vector types.
+ if (!MaskTy->isVectorTy())
+ return;
+
+ // Make sure its an integer type.
+ Type *VecEltTy = MaskTy->getVectorElementType();
+ if (!VecEltTy->isIntegerTy())
+ return;
+
+ // Support any element type from byte up to element size.
+ // This is necesary primarily because 64-bit elements get split to 32-bit
+ // in the constant pool on 32-bit target.
+ unsigned EltTySize = VecEltTy->getIntegerBitWidth();
+ if (EltTySize < 8 || EltTySize > ElSize)
+ return;
+
+ unsigned NumElements = MaskTySize / ElSize;
assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
"Unexpected number of vector elements.");
ShuffleMask.reserve(NumElements);
- if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
- assert((unsigned)NumElements == CDS->getNumElements() &&
- "Constant mask has a different number of elements!");
-
- for (int i = 0; i < NumElements; ++i) {
- int Base = (i * ElementBits / 128) * (128 / ElementBits);
- uint64_t Element = CDS->getElementAsInteger(i);
- // Only the least significant 2 bits of the integer are used.
- int Index = Base + (Element & 0x3);
- ShuffleMask.push_back(Index);
- }
- } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
- assert((unsigned)NumElements == C->getNumOperands() &&
- "Constant mask has a different number of elements!");
-
- for (int i = 0; i < NumElements; ++i) {
- int Base = (i * ElementBits / 128) * (128 / ElementBits);
- Constant *COp = CV->getOperand(i);
- if (isa<UndefValue>(COp)) {
- ShuffleMask.push_back(SM_SentinelUndef);
- continue;
- }
- uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
- // Only the least significant 2 bits of the integer are used.
- int Index = Base + (Element & 0x3);
- ShuffleMask.push_back(Index);
+ unsigned NumElementsPerLane = 128 / ElSize;
+ unsigned Factor = ElSize / EltTySize;
+
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i * Factor);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.push_back(SM_SentinelUndef);
+ continue;
}
+ int Index = i & ~(NumElementsPerLane - 1);
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ if (ElSize == 64)
+ Index += (Element >> 1) & 0x1;
+ else
+ Index += Element & 0x3;
+ ShuffleMask.push_back(Index);
}
+
+ // TODO: Handle funny-looking vectors too.
}
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
@@ -503,4 +556,74 @@ void DecodeINSERTQIMask(int Len, int Idx,
ShuffleMask.push_back(SM_SentinelUndef);
}
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ for (int i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ ShuffleMask.push_back((int)M);
+ }
+}
+
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ if (MaskTy->isVectorTy()) {
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+ return;
+ }
+ // Scalar value; just broadcast it
+ if (!isa<ConstantInt>(C))
+ return;
+ uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
+ int NumElements = VT.getVectorNumElements();
+ Element &= (1 << NumElements) - 1;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask.push_back(Element);
+}
+
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ unsigned NumElements = MaskTy->getVectorNumElements();
+ if (NumElements == VT.getVectorNumElements()) {
+ for (unsigned i = 0; i < NumElements; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ }
+ if (isa<UndefValue>(COp))
+ ShuffleMask.push_back(SM_SentinelUndef);
+ else {
+ uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+ Element &= (1 << NumElements*2) - 1;
+ ShuffleMask.push_back(Element);
+ }
+ }
+ }
+}
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 3d10d18..ab18e64 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -54,6 +54,9 @@ void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decodes a PSWAPD 3DNow! instruction.
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
/// the type of the vector allowing it to handle different datatypes and vector
/// widths.
@@ -83,12 +86,18 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+ SmallVectorImpl<int> &ShuffleMask);
+
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
/// No VT provided since it only works on 256-bit, 4 element vectors.
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+ SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a zero extension instruction as a shuffle mask.
void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
@@ -108,6 +117,22 @@ void DecodeEXTRQIMask(int Len, int Idx,
/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 8403ae6..fbec662 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -23,56 +23,47 @@ class FunctionPass;
class ImmutablePass;
class X86TargetMachine;
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG, ready for
+/// instruction scheduling.
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
-/// createX86GlobalBaseRegPass - This pass initializes a global base
-/// register for PIC on x86-32.
+/// This pass initializes a global base register for PIC on x86-32.
FunctionPass* createX86GlobalBaseRegPass();
-/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
-/// to local-dynamic TLS variables so that the TLS base address for the module
-/// is only fetched once per execution path through the function.
+/// This pass combines multiple accesses to local-dynamic TLS variables so that
+/// the TLS base address for the module is only fetched once per execution path
+/// through the function.
FunctionPass *createCleanupLocalDynamicTLSPass();
-/// createX86FloatingPointStackifierPass - This function returns a pass which
-/// converts floating point register references and pseudo instructions into
-/// floating point stack references and physical instructions.
-///
+/// This function returns a pass which converts floating-point register
+/// references and pseudo instructions into floating-point stack references and
+/// physical instructions.
FunctionPass *createX86FloatingPointStackifierPass();
-/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
-/// before each call to avoid transition penalty between functions encoded with
-/// AVX and SSE.
+/// This pass inserts AVX vzeroupper instructions before each call to avoid
+/// transition penalty between functions encoded with AVX and SSE.
FunctionPass *createX86IssueVZeroUpperPass();
-/// createX86EmitCodeToMemory - Returns a pass that converts a register
-/// allocated function into raw machine code in a dynamically
-/// allocated chunk of memory.
-///
-FunctionPass *createEmitX86CodeToMemory();
-
-/// createX86PadShortFunctions - Return a pass that pads short functions
-/// with NOOPs. This will prevent a stall when returning on the Atom.
+/// Return a pass that pads short functions with NOOPs.
+/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
-/// createX86FixupLEAs - Return a a pass that selectively replaces
-/// certain instructions (like add, sub, inc, dec, some shifts,
-/// and some multiplies) by equivalent LEA instructions, in order
-/// to eliminate execution delays in some Atom processors.
+
+/// Return a a pass that selectively replaces certain instructions (like add,
+/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
+/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
-/// createX86CallFrameOptimization - Return a pass that optimizes
-/// the code-size of x86 call sequences. This is done by replacing
-/// esp-relative movs with pushes.
+/// Return a pass that removes redundant address recalculations.
+FunctionPass *createX86OptimizeLEAs();
+
+/// Return a pass that optimizes the code-size of x86 call sequences. This is
+/// done by replacing esp-relative movs with pushes.
FunctionPass *createX86CallFrameOptimization();
-/// createX86WinEHStatePass - Return an IR pass that inserts EH registration
-/// stack objects and explicit EH state updates. This pass must run after EH
-/// preparation, which does Windows-specific but architecture-neutral
-/// preparation.
+/// Return an IR pass that inserts EH registration stack objects and explicit
+/// EH state updates. This pass must run after EH preparation, which does
+/// Windows-specific but architecture-neutral preparation.
FunctionPass *createX86WinEHStatePass();
/// Return a Machine IR pass that expands X86-specific pseudo
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 8522674..8902a85 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -37,14 +37,26 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
+def FeatureFXSR : SubtargetFeature<"fxsr", "HasFXSR", "true",
+ "Support fxsave/fxrestore instructions">;
+
+def FeatureXSAVE : SubtargetFeature<"xsave", "HasXSAVE", "true",
+ "Support xsave instructions">;
+
+def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
+ "Support xsaveopt instructions">;
+
+def FeatureXSAVEC : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
+ "Support xsavec instructions">;
+
+def FeatureXSAVES : SubtargetFeature<"xsaves", "HasXSAVES", "true",
+ "Support xsaves instructions">;
-def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
- "Enable MMX instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
// SSE1+ processors support them.
- [FeatureMMX, FeatureCMOV]>;
+ [FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -60,6 +72,11 @@ def FeatureSSE41 : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41]>;
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+ "Enable MMX instructions">;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
"Enable 3DNow! instructions",
[FeatureMMX]>;
@@ -79,16 +96,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
-// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that
-// explicit. Also, it seems this would be the default state for most chips
-// going forward, so it would probably be better to negate the logic and
-// match the 32-byte "slow mem" feature below.
-def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
- "IsUAMemFast", "true",
- "Fast unaligned memory access">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+ "IsUAMem16Slow", "true",
+ "Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
- "IsUAMem32Slow", "true",
- "Slow unaligned 32-byte memory access">;
+ "IsUAMem32Slow", "true",
+ "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -120,6 +134,8 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true",
def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
"Enable AVX-512 Vector Length eXtensions",
[FeatureAVX512]>;
+def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
+ "Enable protection keys">;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -168,9 +184,11 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+ "Support LAHF and SAHF instructions">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
-def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
"HasSlowDivide32", "true",
@@ -181,6 +199,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
+// TODO: This feature ought to be renamed.
+// What it really refers to are CPUs for which certain instructions
+// (which ones besides the example below?) are microcoded.
+// The best examples of this are the memory forms of CALL and PUSH
+// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"CallRegIndirect", "true",
"Call register indirect">;
@@ -208,278 +231,473 @@ def ProcIntelSLM : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-def : Proc<"generic", []>;
-def : Proc<"i386", []>;
-def : Proc<"i486", []>;
-def : Proc<"i586", []>;
-def : Proc<"pentium", []>;
-def : Proc<"pentium-mmx", [FeatureMMX]>;
-def : Proc<"i686", []>;
-def : Proc<"pentiumpro", [FeatureCMOV]>;
-def : Proc<"pentium2", [FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3", [FeatureSSE1]>;
-def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
-def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"pentium4", [FeatureSSE2]>;
-def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"generic", [FeatureSlowUAMem16]>;
+def : Proc<"i386", [FeatureSlowUAMem16]>;
+def : Proc<"i486", [FeatureSlowUAMem16]>;
+def : Proc<"i586", [FeatureSlowUAMem16]>;
+def : Proc<"pentium", [FeatureSlowUAMem16]>;
+def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"i686", [FeatureSlowUAMem16]>;
+def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV,
+ FeatureFXSR]>;
+def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+ FeatureFXSR]>;
+def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+ FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR]>;
+def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureSlowBTMem]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureSSE3, FeatureSlowBTMem]>;
+ [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+ FeatureSlowBTMem]>;
// NetBurst.
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : Proc<"prescott",
+ [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+ FeatureSlowBTMem]>;
+def : Proc<"nocona", [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem
+]>;
// Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel,
- [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
-def : ProcessorModel<"penryn", SandyBridgeModel,
- [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"core2", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
+def : ProcessorModel<"penryn", SandyBridgeModel, [
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSE41,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
- ProcIntelAtom,
- FeatureSSSE3,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeatureSlowBTMem,
- FeatureLeaForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions
- ]>;
+ ProcIntelAtom,
+ FeatureSlowUAMem16,
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeatureSlowBTMem,
+ FeatureLEAForSP,
+ FeatureSlowDivide32,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeatureLEAUsesAG,
+ FeaturePadShortFunctions,
+ FeatureLAHFSAHF
+]>;
def : BonnellProc<"bonnell">;
def : BonnellProc<"atom">; // Pin the generic name to the baseline.
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
- ProcIntelSLM,
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureMOVBE,
- FeaturePOPCNT,
- FeaturePCLMUL,
- FeatureAES,
- FeatureSlowDivide64,
- FeatureCallRegIndirect,
- FeaturePRFCHW,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowBTMem,
- FeatureFastUAMem
- ]>;
+ ProcIntelSLM,
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureMOVBE,
+ FeaturePOPCNT,
+ FeaturePCLMUL,
+ FeatureAES,
+ FeatureSlowDivide64,
+ FeatureCallRegIndirect,
+ FeaturePRFCHW,
+ FeatureSlowLEA,
+ FeatureSlowIncDec,
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
+]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureFastUAMem,
- FeaturePOPCNT
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureLAHFSAHF
+]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureSSE42,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureLAHFSAHF
+]>;
def : WestmereProc<"westmere">;
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// rather than a superset.
class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF
+]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
- FeatureAVX,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeatureSlowUAMem32,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase
- ]>;
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureSlowUAMem32,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureRDRAND,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
+]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- FeatureAVX2,
- FeatureCMPXCHG16B,
- FeatureFastUAMem,
- FeaturePOPCNT,
- FeatureAES,
- FeaturePCLMUL,
- FeatureRDRAND,
- FeatureF16C,
- FeatureFSGSBase,
- FeatureMOVBE,
- FeatureLZCNT,
- FeatureBMI,
- FeatureBMI2,
- FeatureFMA,
- FeatureRTM,
- FeatureHLE,
- FeatureADX,
- FeatureRDSEED,
- FeatureSlowIncDec
- ]>;
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
+]>;
def : BroadwellProc<"broadwell">;
// FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
- FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureMPX]>;
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureFXSR,
+ FeatureERI,
+ FeatureCDI,
+ FeaturePFI,
+ FeatureCMPXCHG16B,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureSlowIncDec,
+ FeatureMPX,
+ FeatureLAHFSAHF
+]>;
def : KnightsLandingProc<"knl">;
// FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
- [FeatureAVX512, FeatureCDI,
- FeatureDQI, FeatureBWI, FeatureVLX,
- FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
- FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
- FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
- FeatureSlowIncDec, FeatureMPX]>;
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ FeatureMMX,
+ FeatureAVX512,
+ FeatureFXSR,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeaturePOPCNT,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureRDRAND,
+ FeatureF16C,
+ FeatureFSGSBase,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureFMA,
+ FeatureRTM,
+ FeatureHLE,
+ FeatureADX,
+ FeatureRDSEED,
+ FeatureSlowIncDec,
+ FeatureMPX,
+ FeatureXSAVEC,
+ FeatureXSAVES,
+ FeatureLAHFSAHF
+]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
// AMD CPUs.
-def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [Feature3DNow]>;
-def : Proc<"k6-3", [Feature3DNow]>;
-def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
+ FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
+ FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+ FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+ FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"k8", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-fx", [FeatureSSE2, Feature3DNowA, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"k8-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64-sse3", [FeatureSSE3, Feature3DNowA, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"amdfam10", [FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"barcelona", [FeatureSSE4A,
- Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
+ FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
+def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+ FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+ FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+ FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+ FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+
// Bobcat
-def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD]>;
+def : Proc<"btver1", [
+ FeatureMMX,
+ FeatureSSSE3,
+ FeatureSSE4A,
+ FeatureFXSR,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Jaguar
-def : ProcessorModel<"btver2", BtVer2Model,
- [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
- FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
- FeatureBMI, FeatureF16C, FeatureMOVBE,
- FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
- FeatureSlowSHLD]>;
-
-// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
+def : ProcessorModel<"btver2", BtVer2Model, [
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureCMPXCHG16B,
+ FeaturePRFCHW,
+ FeatureAES,
+ FeaturePCLMUL,
+ FeatureBMI,
+ FeatureF16C,
+ FeatureMOVBE,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureXSAVEOPT,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Bulldozer
-def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD]>;
+def : Proc<"bdver1", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Piledriver
-def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
+def : Proc<"bdver2", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
+]>;
// Steamroller
-def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
- FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
- FeatureAVX, FeatureSSE4A, FeatureF16C,
- FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureFSGSBase]>;
+def : Proc<"bdver3", [
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureMMX,
+ FeatureAVX,
+ FeatureFXSR,
+ FeatureSSE4A,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureXSAVEOPT,
+ FeatureSlowSHLD,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
// Excavator
-def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
- FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
- FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
- FeaturePOPCNT, FeatureBMI, FeatureBMI2,
- FeatureTBM, FeatureFMA, FeatureSSE4A,
- FeatureFSGSBase]>;
-
-def : Proc<"geode", [Feature3DNowA]>;
-
-def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [Feature3DNow]>;
-def : Proc<"c3", [Feature3DNow]>;
-def : Proc<"c3-2", [FeatureSSE1]>;
+def : Proc<"bdver4", [
+ FeatureMMX,
+ FeatureAVX2,
+ FeatureFXSR,
+ FeatureXOP,
+ FeatureFMA4,
+ FeatureCMPXCHG16B,
+ FeatureAES,
+ FeaturePRFCHW,
+ FeaturePCLMUL,
+ FeatureF16C,
+ FeatureLZCNT,
+ FeaturePOPCNT,
+ FeatureXSAVE,
+ FeatureBMI,
+ FeatureBMI2,
+ FeatureTBM,
+ FeatureFMA,
+ FeatureXSAVEOPT,
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
+]>;
+
+def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
+
+def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -492,8 +710,8 @@ def : Proc<"c3-2", [FeatureSSE1]>;
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel,
- [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem]>;
+ [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit,
+ FeatureSlowBTMem ]>;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -520,10 +738,6 @@ include "X86CallingConv.td"
// Assembly Parser
//===----------------------------------------------------------------------===//
-def ATTAsmParser : AsmParser {
- string AsmParserClassName = "AsmParser";
-}
-
def ATTAsmParserVariant : AsmParserVariant {
int Variant = 0;
@@ -568,7 +782,6 @@ def IntelAsmWriter : AsmWriter {
def X86 : Target {
// Information about the instructions...
let InstructionSet = X86InstrInfo;
- let AssemblyParsers = [ATTAsmParser];
let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index ba33248..2170e62 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -217,10 +217,10 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
if (AsmVariant == 0) O << '%';
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
- MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
- MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
- ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
- Reg = getX86SubSuperRegister(Reg, VT);
+ unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
+ (strcmp(Modifier+6,"32") == 0) ? 32 :
+ (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
+ Reg = getX86SubSuperRegister(Reg, Size);
}
O << X86ATTInstPrinter::getRegisterName(Reg);
return;
@@ -361,22 +361,21 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i8);
+ Reg = getX86SubSuperRegister(Reg, 8);
break;
case 'h': // Print QImode high register
- Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+ Reg = getX86SubSuperRegister(Reg, 8, true);
break;
case 'w': // Print HImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i16);
+ Reg = getX86SubSuperRegister(Reg, 16);
break;
case 'k': // Print SImode register
- Reg = getX86SubSuperRegister(Reg, MVT::i32);
+ Reg = getX86SubSuperRegister(Reg, 32);
break;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
- MVT::SimpleValueType Ty = P.getSubtarget().is64Bit() ? MVT::i64 : MVT::i32;
- Reg = getX86SubSuperRegister(Reg, Ty);
+ Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32);
break;
}
@@ -535,6 +534,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
}
}
+ OutStreamer->EmitSyntaxDirective();
}
static void
@@ -565,10 +565,11 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
const MachineConstantPoolEntry &CPE =
MF->getConstantPool()->getConstants()[CPID];
if (!CPE.isMachineConstantPoolEntry()) {
- SectionKind Kind = CPE.getSectionKind(TM.getDataLayout());
+ const DataLayout &DL = MF->getDataLayout();
+ SectionKind Kind = CPE.getSectionKind(&DL);
const Constant *C = CPE.Val.ConstVal;
if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
- getObjFileLowering().getSectionForConstant(Kind, C))) {
+ getObjFileLowering().getSectionForConstant(DL, Kind, C))) {
if (MCSymbol *Sym = S->getCOMDATSymbol()) {
if (Sym->isUndefined())
OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index 7f5d127..9c8bd98 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -78,8 +78,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
// outputting it to the OutStream. This allows the shadow tracker to minimise
// the number of NOPs used for stackmap padding.
void EmitAndCountInstruction(MCInst &Inst);
-
- void InsertStackMapShadows(MachineFunction &MF);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 031ba4b..fc6ee17 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
@@ -53,10 +54,13 @@ private:
// Information we know about a particular call site
struct CallContext {
CallContext()
- : Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
- MovVector(4, nullptr), NoStackParams(false), UsePush(false){};
+ : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
+ MovVector(4, nullptr), NoStackParams(false), UsePush(false){}
- // Actuall call instruction
+ // Iterator referring to the frame setup instruction
+ MachineBasicBlock::iterator FrameSetup;
+
+ // Actual call instruction
MachineInstr *Call;
// A copy of the stack pointer
@@ -75,17 +79,16 @@ private:
bool UsePush;
};
- typedef DenseMap<MachineInstr *, CallContext> ContextMap;
+ typedef SmallVector<CallContext, 8> ContextVector;
bool isLegal(MachineFunction &MF);
- bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
+ bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, CallContext &Context);
- bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock::iterator I,
- const CallContext &Context);
+ bool adjustCallSequence(MachineFunction &MF, const CallContext &Context);
MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
unsigned Reg);
@@ -100,7 +103,8 @@ private:
const char *getPassName() const override { return "X86 Optimize Call Frame"; }
const TargetInstrInfo *TII;
- const TargetFrameLowering *TFL;
+ const X86FrameLowering *TFL;
+ const X86Subtarget *STI;
const MachineRegisterInfo *MRI;
static char ID;
};
@@ -124,8 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// No point in running this in 64-bit mode, since some arguments are
// passed in-register in all common calling conventions, so the pattern
// we're looking for will never match.
- const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
- if (STI.is64Bit())
+ if (STI->is64Bit())
+ return false;
+
+ // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
+ // in the compact unwind encoding that Darwin uses. So, bail if there
+ // is a danger of that being generated.
+ if (STI->isTargetDarwin() &&
+ (!MF.getMMI().getLandingPads().empty() ||
+ (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
return false;
// You would expect straight-line code between call-frame setup and
@@ -161,7 +172,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// Check whether this trasnformation is profitable for a particular
// function - in terms of code size.
bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
- ContextMap &CallSeqMap) {
+ ContextVector &CallSeqVector) {
// This transformation is always a win when we do not expect to have
// a reserved call frame. Under other circumstances, it may be either
// a win or a loss, and requires a heuristic.
@@ -170,24 +181,20 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
return true;
// Don't do this when not optimizing for size.
- bool OptForSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize);
-
- if (!OptForSize)
+ if (!MF.getFunction()->optForSize())
return false;
unsigned StackAlign = TFL->getStackAlignment();
int64_t Advantage = 0;
- for (auto CC : CallSeqMap) {
+ for (auto CC : CallSeqVector) {
// Call sites where no parameters are passed on the stack
// do not affect the cost, since there needs to be no
// stack adjustment.
- if (CC.second.NoStackParams)
+ if (CC.NoStackParams)
continue;
- if (!CC.second.UsePush) {
+ if (!CC.UsePush) {
// If we don't use pushes for a particular call site,
// we pay for not having a reserved call frame with an
// additional sub/add esp pair. The cost is ~3 bytes per instruction,
@@ -200,11 +207,11 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
// We'll need a add after the call.
Advantage -= 3;
// If we have to realign the stack, we'll also need and sub before
- if (CC.second.ExpectedDist % StackAlign)
+ if (CC.ExpectedDist % StackAlign)
Advantage -= 3;
// Now, for each push, we save ~3 bytes. For small constants, we actually,
// save more (up to 5 bytes), but 3 should be a good approximation.
- Advantage += (CC.second.ExpectedDist / 4) * 3;
+ Advantage += (CC.ExpectedDist / 4) * 3;
}
}
@@ -212,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
}
bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
- TII = MF.getSubtarget().getInstrInfo();
- TFL = MF.getSubtarget().getFrameLowering();
+ STI = &MF.getSubtarget<X86Subtarget>();
+ TII = STI->getInstrInfo();
+ TFL = STI->getFrameLowering();
MRI = &MF.getRegInfo();
if (!isLegal(MF))
@@ -223,21 +231,22 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
- ContextMap CallSeqMap;
+ ContextVector CallSeqVector;
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
if (I->getOpcode() == FrameSetupOpcode) {
- CallContext &Context = CallSeqMap[I];
+ CallContext Context;
collectCallInfo(MF, *BB, I, Context);
+ CallSeqVector.push_back(Context);
}
- if (!isProfitable(MF, CallSeqMap))
+ if (!isProfitable(MF, CallSeqVector))
return false;
- for (auto CC : CallSeqMap)
- if (CC.second.UsePush)
- Changed |= adjustCallSequence(MF, CC.first, CC.second);
+ for (auto CC : CallSeqVector)
+ if (CC.UsePush)
+ Changed |= adjustCallSequence(MF, CC);
return Changed;
}
@@ -307,13 +316,13 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// Check that this particular call sequence is amenable to the
// transformation.
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
- MF.getSubtarget().getRegisterInfo());
- unsigned StackPtr = RegInfo.getStackRegister();
+ STI->getRegisterInfo());
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
// We expect to enter this at the beginning of a call sequence
assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
MachineBasicBlock::iterator FrameSetup = I++;
+ Context.FrameSetup = FrameSetup;
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
@@ -338,7 +347,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
if (!I->isCopy() || !I->getOperand(0).isReg())
return;
Context.SPCopy = I++;
- StackPtr = Context.SPCopy->getOperand(0).getReg();
+
+ unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();
// Scan the call setup sequence for the pattern we're looking for.
// We only handle a simple case - a sequence of MOV32mi or MOV32mr
@@ -434,22 +444,22 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
}
bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
- MachineBasicBlock::iterator I,
const CallContext &Context) {
// Ok, we can in fact do the transformation for this call.
// Do not remove the FrameSetup instruction, but adjust the parameters.
// PEI will end up finalizing the handling of this.
- MachineBasicBlock::iterator FrameSetup = I;
- MachineBasicBlock &MBB = *(I->getParent());
+ MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
+ MachineBasicBlock &MBB = *(FrameSetup->getParent());
FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
- DebugLoc DL = I->getDebugLoc();
+ DebugLoc DL = FrameSetup->getDebugLoc();
// Now, iterate through the vector in reverse order, and replace the movs
// with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
// replace uses.
for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+ MachineBasicBlock::iterator Push = nullptr;
if (MOV->getOpcode() == X86::MOV32mi) {
unsigned PushOpcode = X86::PUSHi32;
// If the operand is a small (8-bit) immediate, we can use a
@@ -461,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
if (isInt<8>(Val))
PushOpcode = X86::PUSH32i8;
}
- BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
+ .addOperand(PushOp);
} else {
unsigned int Reg = PushOp.getReg();
// If PUSHrmm is not slow on this target, try to fold the source of the
// push into the instruction.
- const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
+ bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
// Check that this is legal to fold. Right now, we're extremely
// conservative about that.
MachineInstr *DefMov = nullptr;
if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
- MachineInstr *Push =
- BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
unsigned NumOps = DefMov->getDesc().getNumOperands();
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -483,12 +492,19 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
DefMov->eraseFromParent();
} else {
- BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
.addReg(Reg)
.getInstr();
}
}
+ // For debugging, when using SP-based CFA, we need to adjust the CFA
+ // offset after each push.
+ // TODO: This is needed only if we require precise CFA.
+ if (!TFL->hasFP(MF))
+ TFL->BuildCFI(MBB, std::next(Push), DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+
MBB.erase(MOV);
}
@@ -532,13 +548,10 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
DefMI->getParent() != FrameSetup->getParent())
return nullptr;
- // Now, make sure everything else up until the ADJCALLSTACK is a sequence
- // of MOVs. To be less conservative would require duplicating a lot of the
- // logic from PeepholeOptimizer.
- // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
- // to be smarter about folding into pushes.
+ // Make sure we don't have any instructions between DefMI and the
+ // push that make folding the load illegal.
for (auto I = DefMI; I != FrameSetup; ++I)
- if (I->getOpcode() != X86::MOV32rm)
+ if (I->isLoadFoldBarrier())
return nullptr;
return DefMI;
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h
index 0eb2494..a08160f 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.h
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
#define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/IR/CallingConv.h"
@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
+inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+ // not to split i64 and double between a register and stack
+ static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+ static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
+
+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+ // If this is the first part of an double/i64/i128, or if we're already
+ // in the middle of a split, add to the pending list. If this is not
+ // the end of the split, return, otherwise go on to process the pending
+ // list
+ if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+ PendingMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+ if (!ArgFlags.isSplitEnd())
+ return true;
+ }
+
+ // If there are no pending members, we are not in the middle of a split,
+ // so do the usual inreg stuff.
+ if (PendingMembers.empty()) {
+ if (unsigned Reg = State.AllocateReg(RegList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+ }
+ return false;
+ }
+
+ assert(ArgFlags.isSplitEnd());
+
+ // We now have the entire original argument in PendingMembers, so decide
+ // whether to use registers or the stack.
+ // Per the MCU ABI:
+ // a) To use registers, we need to have enough of them free to contain
+ // the entire argument.
+ // b) We never want to use more than 2 registers for a single argument.
+
+ unsigned FirstFree = State.getFirstUnallocated(RegList);
+ bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+ for (auto &It : PendingMembers) {
+ if (UseRegs)
+ It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+ else
+ It.convertToMem(State.AllocateStack(4, 4));
+ State.addLoc(It);
+ }
+
+ PendingMembers.clear();
+
+ return true;
+}
+
} // End llvm namespace
#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 8f88888..54d88cb 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
// MMX vector types are always returned in XMM0.
CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
@@ -202,6 +203,16 @@ def RetCC_X86_64_AnyReg : CallingConv<[
CCCustom<"CC_X86_AnyReg_Error">
]>;
+// X86-64 HHVM return-value convention.
+def RetCC_X86_64_HHVM: CallingConv<[
+ // Promote all types to i64
+ CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+ // Return: could return in any GP register save RSP and R12.
+ CCIfType<[i64], CCAssignToReg<[RBX, RBP, RDI, RSI, RDX, RCX, R8, R9,
+ RAX, R10, R11, R13, R14, R15]>>
+]>;
+
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
@@ -227,6 +238,9 @@ def RetCC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
+ // Handle HHVM calls.
+ CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
+
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -280,7 +294,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
@@ -305,7 +319,7 @@ def CC_X86_64_C : CallingConv<[
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
- CCIfType<[f80], CCAssignToStack<0, 0>>,
+ CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
@@ -319,6 +333,23 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<64, 64>>
]>;
+// Calling convention for X86-64 HHVM.
+def CC_X86_64_HHVM : CallingConv<[
+ // Use all/any GP registers for args, except RSP.
+ CCIfType<[i64], CCAssignToReg<[RBX, R12, RBP, R15,
+ RDI, RSI, RDX, RCX, R8, R9,
+ RAX, R10, R11, R13, R14]>>
+]>;
+
+// Calling convention for helper functions in HHVM.
+def CC_X86_64_HHVM_C : CallingConv<[
+ // Pass the first argument in RBP.
+ CCIfType<[i64], CCAssignToReg<[RBP]>>,
+
+ // Otherwise it's the same as the regular C calling convention.
+ CCDelegateTo<CC_X86_64_C>
+]>;
+
// Calling convention used on Win64
def CC_X86_Win64_C : CallingConv<[
// FIXME: Handle byval stuff.
@@ -561,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_MCU : CallingConv<[
+ // Handles byval parameters. Note that, like FastCC, we can't rely on
+ // the delegation to CC_X86_32_Common because that happens after code that
+ // puts arguments in registers.
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ // Promote i1/i8/i16 arguments to i32.
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // If the call is not a vararg call, some arguments may be passed
+ // in integer registers.
+ CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
def CC_X86_32_FastCall : CallingConv<[
// Promote i1/i8/i16 arguments to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -708,18 +756,28 @@ def CC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<CC_X86_32_C>
]>;
+def CC_X86_32_Intr : CallingConv<[
+ CCAssignToStack<4, 4>
+]>;
+
+def CC_X86_64_Intr : CallingConv<[
+ CCAssignToStack<8, 8>
+]>;
+
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
+ CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@@ -734,6 +792,9 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
+ CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
+ CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+ CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -764,6 +825,12 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
(sequence "XMM%u", 6, 15))>;
+// The function used by Darwin to obtain the address of a thread-local variable
+// uses rdi to pass a single parameter and rax for the return value. All other
+// GPRs are preserved.
+def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
+ R8, R9, R10, R11)>;
+
// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
R8, R9, R10, RSP)>;
@@ -778,6 +845,11 @@ def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
R11, R12, R13, R14, R15, RBP,
(sequence "XMM%u", 0, 15))>;
+def CSR_32_AllRegs : CalleeSavedRegs<(add EAX, EBX, ECX, EDX, EBP, ESI,
+ EDI, ESP)>;
+def CSR_32_AllRegs_SSE : CalleeSavedRegs<(add CSR_32_AllRegs,
+ (sequence "XMM%u", 0, 7))>;
+
def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP,
(sequence "XMM%u", 16, 31))>;
def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP,
@@ -804,3 +876,6 @@ def CSR_64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add CSR_64,
def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
(sequence "ZMM%u", 16, 31),
K4, K5, K6, K7)>;
+
+// Only R12 is preserved for PHP calls in HHVM.
+def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
diff --git a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 6a5a28e..a09d065 100644
--- a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -19,9 +19,10 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
-#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
#include "llvm/IR/GlobalValue.h"
using namespace llvm;
@@ -141,6 +142,24 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// The EH_RETURN pseudo is really removed during the MC Lowering.
return true;
}
+ case X86::IRET: {
+ // Adjust stack to erase error code
+ int64_t StackAdj = MBBI->getOperand(0).getImm();
+ X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true);
+ // Replace pseudo with machine iret
+ BuildMI(MBB, MBBI, DL,
+ TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
+ MBB.erase(MBBI);
+ return true;
+ }
+ case X86::EH_RESTORE: {
+ // Restore ESP and EBP, and optionally ESI if required.
+ bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
+ MBB.getParent()->getFunction()->getPersonalityFn()));
+ X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
+ MBBI->eraseFromParent();
+ return true;
+ }
}
llvm_unreachable("Previous switch has a fallthrough?");
}
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index b4319c8..de94a13 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -298,8 +298,8 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
return false;
// Make sure nothing is in the way
- BasicBlock::const_iterator Start = I;
- BasicBlock::const_iterator End = II;
+ BasicBlock::const_iterator Start(I);
+ BasicBlock::const_iterator End(II);
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
// We only expect extractvalue instructions between the intrinsic and the
// instruction to be selected.
@@ -433,6 +433,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE2 = Subtarget->hasSSE2();
+ bool HasSSE4A = Subtarget->hasSSE4A();
+ bool HasAVX = Subtarget->hasAVX();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -449,35 +454,59 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
// FALLTHROUGH, handling i1 as i8.
case MVT::i8: Opc = X86::MOV8mr; break;
case MVT::i16: Opc = X86::MOV16mr; break;
- case MVT::i32: Opc = X86::MOV32mr; break;
- case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+ case MVT::i32:
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+ break;
+ case MVT::i64:
+ // Must be in x86-64 mode.
+ Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+ break;
case MVT::f32:
- Opc = X86ScalarSSEf32 ?
- (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSS;
+ else
+ Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+ } else
+ Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- Opc = X86ScalarSSEf64 ?
- (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+ if (X86ScalarSSEf32) {
+ if (IsNonTemporal && HasSSE4A)
+ Opc = X86::MOVNTSD;
+ else
+ Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+ } else
+ Opc = X86::ST_Fp64m;
break;
case MVT::v4f32:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
break;
case MVT::v2f64:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
- else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+ else
+ Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+ } else
+ Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Aligned)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
- else
+ if (Aligned) {
+ if (IsNonTemporal)
+ Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+ else
+ Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+ } else
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
break;
}
@@ -1069,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
- // The x86-64 ABI for returning structs by value requires that we copy
- // the sret argument into %rax for the return. We saved the argument into
- // a virtual register in the entry block, so now we copy the value out
- // and into %rax. We also do the same with %eax for Win32.
- if (F.hasStructRetAttr() &&
- (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+ // All x86 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // We saved the argument into a virtual register in the entry block,
+ // so now we copy the value out and into %rax/%eax.
+ if (F.hasStructRetAttr()) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -1431,17 +1459,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addMBB(TrueMBB);
}
- // Obtain the branch weight and add the TrueBB to the successor list.
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
-
- // Emits an unconditional branch to the FalseBB, obtains the branch
- // weight, and adds it to the successor list.
- fastEmitBranch(FalseMBB, DbgLoc);
-
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1472,12 +1490,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
}
@@ -1492,12 +1506,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1511,12 +1520,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
.addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
.addMBB(TrueMBB);
- fastEmitBranch(FalseMBB, DbgLoc);
- uint32_t BranchWeight = 0;
- if (FuncInfo.BPI)
- BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
- TrueMBB->getBasicBlock());
- FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
return true;
}
@@ -1945,6 +1949,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned ResultReg;
if (Subtarget->hasAVX()) {
+ const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+ const TargetRegisterClass *VR128 = &X86::VR128RegClass;
+
// If we have AVX, create 1 blendv instead of 3 logic instructions.
// Blendv was introduced with SSE 4.1, but the 2 register form implicitly
// uses XMM0 as the selection register. That may need just as many
@@ -1955,10 +1962,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
unsigned BlendOpcode =
(RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
- unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+ unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
- ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
- LHSReg, LHSIsKill, CmpReg, true);
+ unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill, CmpReg, true);
+ ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
} else {
unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
CmpRHSReg, CmpRHSIsKill, CC);
@@ -2806,10 +2816,12 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE)
return 0;
- if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
- return 0;
- if (CS && CS->paramHasAttr(1, Attribute::InReg))
- return 0;
+
+ if (CS)
+ if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
+ CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ return 0;
+
return 4;
}
@@ -2924,7 +2936,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -3020,8 +3032,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
- ArgVT.getStoreSize(), Alignment);
+ MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
+ MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
if (Flags.isByVal()) {
X86AddressMode SrcAM;
SrcAM.Base.Reg = ArgReg;
@@ -3252,6 +3264,30 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
updateValueMap(I, Reg);
return true;
}
+ case Instruction::BitCast: {
+ // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+ if (!Subtarget->hasSSE2())
+ return false;
+
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
+
+ if (!SrcVT.isSimple() || !DstVT.isSimple())
+ return false;
+
+ if (!SrcVT.is128BitVector() &&
+ !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+ return false;
+
+ unsigned Reg = getRegForValue(I->getOperand(0));
+ if (Reg == 0)
+ return false;
+
+ // No instruction is needed for conversion. Reuse the register used by
+ // the fist operand.
+ updateValueMap(I, Reg);
+ return true;
+ }
}
return false;
@@ -3384,8 +3420,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
TII.get(Opc), ResultReg);
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- DL.getPointerSize(), Align);
+ MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+ MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 5eb4fae..1dd69e8 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -9,6 +9,7 @@
//
// This file defines the pass that finds instructions that can be
// re-written as LEA instructions in order to reduce pipeline delays.
+// When optimizing for size it replaces suitable LEAs with INC or DEC.
//
//===----------------------------------------------------------------------===//
@@ -61,6 +62,11 @@ class FixupLEAPass : public MachineFunctionPass {
void processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI);
+ /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
+ /// and convert them to INC or DEC respectively.
+ bool fixupIncDec(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) const;
+
/// \brief Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
@@ -89,6 +95,8 @@ public:
private:
MachineFunction *MF;
const X86InstrInfo *TII; // Machine instruction info.
+ bool OptIncDec;
+ bool OptLEA;
};
char FixupLEAPass::ID = 0;
}
@@ -150,7 +158,10 @@ FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
MF = &Func;
const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
- if (!ST.LEAusesAG() && !ST.slowLEA())
+ OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
+ OptLEA = ST.LEAusesAG() || ST.slowLEA();
+
+ if (!OptLEA && !OptIncDec)
return false;
TII = ST.getInstrInfo();
@@ -187,7 +198,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
if (I == MFI->begin()) {
- if (MFI->isPredecessor(MFI)) {
+ if (MFI->isPredecessor(&*MFI)) {
I = --MFI->end();
return true;
} else
@@ -222,6 +233,60 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
return nullptr;
}
+static inline bool isLEA(const int opcode) {
+ return opcode == X86::LEA16r || opcode == X86::LEA32r ||
+ opcode == X86::LEA64r || opcode == X86::LEA64_32r;
+}
+
+/// isLEASimpleIncOrDec - Does this LEA have one these forms:
+/// lea %reg, 1(%reg)
+/// lea %reg, -1(%reg)
+static inline bool isLEASimpleIncOrDec(MachineInstr *LEA) {
+ unsigned SrcReg = LEA->getOperand(1 + X86::AddrBaseReg).getReg();
+ unsigned DstReg = LEA->getOperand(0).getReg();
+ unsigned AddrDispOp = 1 + X86::AddrDisp;
+ return SrcReg == DstReg &&
+ LEA->getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
+ LEA->getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
+ LEA->getOperand(AddrDispOp).isImm() &&
+ (LEA->getOperand(AddrDispOp).getImm() == 1 ||
+ LEA->getOperand(AddrDispOp).getImm() == -1);
+}
+
+bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) const {
+ MachineInstr *MI = I;
+ int Opcode = MI->getOpcode();
+ if (!isLEA(Opcode))
+ return false;
+
+ if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
+ int NewOpcode;
+ bool isINC = MI->getOperand(4).getImm() == 1;
+ switch (Opcode) {
+ case X86::LEA16r:
+ NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
+ break;
+ case X86::LEA32r:
+ case X86::LEA64_32r:
+ NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
+ break;
+ case X86::LEA64r:
+ NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
+ break;
+ }
+
+ MachineInstr *NewMI =
+ BuildMI(*MFI, I, MI->getDebugLoc(), TII->get(NewOpcode))
+ .addOperand(MI->getOperand(0))
+ .addOperand(MI->getOperand(1));
+ MFI->erase(I);
+ I = static_cast<MachineBasicBlock::iterator>(NewMI);
+ return true;
+ }
+ return false;
+}
+
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
// Process a load, store, or LEA instruction.
@@ -265,8 +330,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineInstr *MI = I;
const int opcode = MI->getOpcode();
- if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
- opcode != X86::LEA64_32r)
+ if (!isLEA(opcode))
return;
if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
!TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -280,7 +344,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
return;
int addrr_opcode, addri_opcode;
switch (opcode) {
- default: llvm_unreachable("Unexpected LEA instruction");
+ default:
+ llvm_unreachable("Unexpected LEA instruction");
case X86::LEA16r:
addrr_opcode = X86::ADD16rr;
addri_opcode = X86::ADD16ri;
@@ -330,10 +395,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
MachineFunction::iterator MFI) {
for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
- if (MF.getSubtarget<X86Subtarget>().isSLM())
- processInstructionForSLM(I, MFI);
- else
- processInstruction(I, MFI);
+ if (OptIncDec)
+ if (fixupIncDec(I, MFI))
+ continue;
+
+ if (OptLEA) {
+ if (MF.getSubtarget<X86Subtarget>().isSLM())
+ processInstructionForSLM(I, MFI);
+ else
+ processInstruction(I, MFI);
+ }
}
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a..97bb8ab 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -120,12 +120,10 @@ namespace {
// Return a bitmask of FP registers in block's live-in list.
static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
unsigned Mask = 0;
- for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
- E = MBB->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- if (Reg < X86::FP0 || Reg > X86::FP6)
+ for (const auto &LI : MBB->liveins()) {
+ if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6)
continue;
- Mask |= 1 << (Reg - X86::FP0);
+ Mask |= 1 << (LI.PhysReg - X86::FP0);
}
return Mask;
}
@@ -301,8 +299,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
bool FPIsUsed = false;
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; i <= 6; ++i)
- if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
FPIsUsed = true;
break;
}
@@ -321,7 +320,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Process the function in depth first order so that we process at least one
// of the predecessors for every reachable block in the function.
SmallPtrSet<MachineBasicBlock*, 8> Processed;
- MachineBasicBlock *Entry = MF.begin();
+ MachineBasicBlock *Entry = &MF.front();
bool Changed = false;
for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
@@ -329,9 +328,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
// Process any unreachable blocks in arbitrary order now.
if (MF.size() != Processed.size())
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB).second)
- Changed |= processBasicBlock(MF, *BB);
+ for (MachineBasicBlock &BB : MF)
+ if (Processed.insert(&BB).second)
+ Changed |= processBasicBlock(MF, BB);
LiveBundles.clear();
@@ -348,13 +347,12 @@ void FPS::bundleCFG(MachineFunction &MF) {
LiveBundles.resize(Bundles->getNumBundles());
// Gather the actual live-in masks for all MBBs.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = I;
- const unsigned Mask = calcLiveInMask(MBB);
+ for (MachineBasicBlock &MBB : MF) {
+ const unsigned Mask = calcLiveInMask(&MBB);
if (!Mask)
continue;
// Update MBB ingoing bundle mask.
- LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+ LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
}
}
@@ -546,17 +544,9 @@ namespace {
};
}
-#ifndef NDEBUG
-static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
- for (unsigned i = 0; i != NumEntries-1; ++i)
- if (!(Table[i] < Table[i+1])) return false;
- return true;
-}
-#endif
-
-static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
- const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
- if (I != Table+N && I->from == Opcode)
+static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
+ const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
+ if (I != Table.end() && I->from == Opcode)
return I->to;
return -1;
}
@@ -567,7 +557,7 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
#define ASSERT_SORTED(TABLE) \
{ static bool TABLE##Checked = false; \
if (!TABLE##Checked) { \
- assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
+ assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) && \
"All lookup tables must be sorted for efficient access!"); \
TABLE##Checked = true; \
} \
@@ -746,7 +736,7 @@ static const TableEntry OpcodeTable[] = {
static unsigned getConcreteOpcode(unsigned Opcode) {
ASSERT_SORTED(OpcodeTable);
- int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+ int Opc = Lookup(OpcodeTable, Opcode);
assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
return Opc;
}
@@ -797,7 +787,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
RegMap[Stack[--StackTop]] = ~0; // Update state
// Check to see if there is a popping version of this instruction...
- int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+ int Opcode = Lookup(PopTable, I->getOpcode());
if (Opcode != -1) {
I->setDesc(TII->get(Opcode));
if (Opcode == X86::UCOM_FPPr)
@@ -1193,7 +1183,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
// We decide which form to use based on what is on the top of the stack, and
// which operand is killed by this instruction.
- const TableEntry *InstTable;
+ ArrayRef<TableEntry> InstTable;
bool isForward = TOS == Op0;
bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
if (updateST0) {
@@ -1208,8 +1198,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
InstTable = ReverseSTiTable;
}
- int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
- MI->getOpcode());
+ int Opcode = Lookup(InstTable, MI->getOpcode());
assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
// NotTOS - The register which is not on the top of stack...
@@ -1520,31 +1509,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
return;
}
- case X86::WIN_FTOL_32:
- case X86::WIN_FTOL_64: {
- // Push the operand into ST0.
- MachineOperand &Op = MI->getOperand(0);
- assert(Op.isUse() && Op.isReg() &&
- Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
- unsigned FPReg = getFPReg(Op);
- if (Op.isKill())
- moveToTop(FPReg, Inst);
- else
- duplicateToTop(FPReg, ScratchFPReg, Inst);
-
- // Emit the call. This will pop the operand.
- BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
- .addExternalSymbol("_ftol2")
- .addReg(X86::ST0, RegState::ImplicitKill)
- .addReg(X86::ECX, RegState::ImplicitDefine)
- .addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::EDX, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- --StackTop;
-
- break;
- }
-
case X86::RETQ:
case X86::RETL:
case X86::RETIL:
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 3a21b57..242d0333 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -18,25 +18,23 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
#include <cstdlib>
using namespace llvm;
-// FIXME: completely move here.
-extern cl::opt<bool> ForceStackAlign;
-
X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
unsigned StackAlignOverride)
: TargetFrameLowering(StackGrowsDown, StackAlignOverride,
@@ -80,6 +78,27 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
}
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Conservativley assume that inline assembly might use the stack.
+ if (MF.hasInlineAsm())
+ return true;
+
+ return any_of(MRI.reg_instructions(X86::EFLAGS),
+ [](const MachineInstr &RI) { return RI.isCopy(); });
+}
+
+static bool doesStackUseImplyFP(const MachineFunction &MF) {
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ return IsWin64Prologue && usesTheStack(MF);
+}
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -92,8 +111,9 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
- MMI.callsUnwindInit() || MMI.callsEHReturn() ||
- MFI->hasStackMap() || MFI->hasPatchPoint());
+ MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
+ MFI->hasStackMap() || MFI->hasPatchPoint() ||
+ doesStackUseImplyFP(MF));
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -148,21 +168,14 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
/// to this register without worry about clobbering it.
static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const TargetRegisterInfo *TRI,
+ const X86RegisterInfo *TRI,
bool Is64Bit) {
const MachineFunction *MF = MBB.getParent();
const Function *F = MF->getFunction();
if (!F || MF->getMMI().callsEHReturn())
return 0;
- static const uint16_t CallerSavedRegs32Bit[] = {
- X86::EAX, X86::EDX, X86::ECX, 0
- };
-
- static const uint16_t CallerSavedRegs64Bit[] = {
- X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
- X86::R8, X86::R9, X86::R10, X86::R11, 0
- };
+ const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
unsigned Opc = MBBI->getOpcode();
switch (Opc) {
@@ -191,10 +204,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
Uses.insert(*AI);
}
- const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
- for (; *CS; ++CS)
- if (!Uses.count(*CS))
- return *CS;
+ for (auto CS : AvailableRegs)
+ if (!Uses.count(CS) && CS != X86::RIP)
+ return CS;
}
}
@@ -214,8 +226,12 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
-static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+/// Check if the flags need to be preserved before the terminators.
+/// This would be the case, if the eflags is live-in of the region
+/// composed by the terminators or live-out of that region, without
+/// being defined by a terminator.
+static bool
+flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
for (const MachineInstr &MI : MBB.terminators()) {
bool BreakNext = false;
for (const MachineOperand &MO : MI.operands()) {
@@ -225,15 +241,27 @@ static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
if (Reg != X86::EFLAGS)
continue;
- // This terminator needs an eflag that is not defined
- // by a previous terminator.
+ // This terminator needs an eflags that is not defined
+ // by a previous another terminator:
+ // EFLAGS is live-in of the region composed by the terminators.
if (!MO.isDef())
return true;
+ // This terminator defines the eflags, i.e., we don't need to preserve it.
+ // However, we still need to check this specific terminator does not
+ // read a live-in value.
BreakNext = true;
}
+ // We found a definition of the eflags, no need to preserve them.
if (BreakNext)
- break;
+ return false;
}
+
+ // None of the terminators use or define the eflags.
+ // Check if they are live-out, that would imply we need to preserve them.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
+ return true;
+
return false;
}
@@ -289,6 +317,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
.addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
if (isSub)
MI->setFlag(MachineInstr::FrameSetup);
+ else
+ MI->setFlag(MachineInstr::FrameDestroy);
Offset -= ThisVal;
continue;
}
@@ -298,6 +328,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
if (isSub)
MI.setMIFlag(MachineInstr::FrameSetup);
+ else
+ MI.setMIFlag(MachineInstr::FrameDestroy);
Offset -= ThisVal;
}
@@ -312,7 +344,11 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
// is tricky.
bool UseLEA;
if (!InEpilogue) {
- UseLEA = STI.useLeaForSP();
+ // Check if inserting the prologue at the beginning
+ // of MBB would require to use LEA operations.
+ // We need to use LEA operations if EFLAGS is live in, because
+ // it means an instruction will read it before it gets defined.
+ UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
} else {
// If we can use LEA for SP but we shouldn't, check that none
// of the terminators uses the eflags. Otherwise we will insert
@@ -321,10 +357,10 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
// and is an optimization anyway.
UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
if (UseLEA && !STI.useLeaForSP())
- UseLEA = terminatorsNeedFlagsAsInput(MBB);
+ UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
// If that assert breaks, that means we do not do the right thing
// in canUseAsEpilogue.
- assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) &&
+ assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
"We shouldn't have allowed this insertion point");
}
@@ -347,30 +383,6 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
return MI;
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = nullptr) {
- if (MBBI == MBB.begin()) return;
-
- MachineBasicBlock::iterator PI = std::prev(MBBI);
- unsigned Opc = PI->getOpcode();
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
- Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes += PI->getOperand(2).getImm();
- MBB.erase(PI);
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
- PI->getOperand(0).getReg() == StackPtr) {
- if (NumBytes)
- *NumBytes -= PI->getOperand(2).getImm();
- MBB.erase(PI);
- }
-}
-
int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
bool doMergeWithPrevious) const {
@@ -436,27 +448,265 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
}
}
-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
- const MachineRegisterInfo &MRI = MF.getRegInfo();
+MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL,
+ bool InProlog) const {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ if (STI.isTargetWindowsCoreCLR()) {
+ if (InProlog) {
+ return emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
+ } else {
+ return emitStackProbeInline(MF, MBB, MBBI, DL, false);
+ }
+ } else {
+ return emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+ }
+}
- for (MachineRegisterInfo::reg_instr_iterator
- ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
- ri != re; ++ri)
- if (ri->isCopy())
- return true;
+void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const {
+ const StringRef ChkStkStubSymbol = "__chkstk_stub";
+ MachineInstr *ChkStkStub = nullptr;
- return false;
+ for (MachineInstr &MI : PrologMBB) {
+ if (MI.isCall() && MI.getOperand(0).isSymbol() &&
+ ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) {
+ ChkStkStub = &MI;
+ break;
+ }
+ }
+
+ if (ChkStkStub != nullptr) {
+ MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
+ assert(std::prev(MBBI).operator==(ChkStkStub) &&
+ "MBBI expected after __chkstk_stub.");
+ DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
+ emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
+ ChkStkStub->eraseFromParent();
+ }
}
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc DL) const {
+MachineInstr *X86FrameLowering::emitStackProbeInline(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ assert(STI.is64Bit() && "different expansion needed for 32 bit");
+ assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+
+ // RAX contains the number of bytes of desired stack adjustment.
+ // The handling here assumes this value has already been updated so as to
+ // maintain stack alignment.
+ //
+ // We need to exit with RSP modified by this amount and execute suitable
+ // page touches to notify the OS that we're growing the stack responsibly.
+ // All stack probing must be done without modifying RSP.
+ //
+ // MBB:
+ // SizeReg = RAX;
+ // ZeroReg = 0
+ // CopyReg = RSP
+ // Flags, TestReg = CopyReg - SizeReg
+ // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
+ // LimitReg = gs magic thread env access
+ // if FinalReg >= LimitReg goto ContinueMBB
+ // RoundBB:
+ // RoundReg = page address of FinalReg
+ // LoopMBB:
+ // LoopReg = PHI(LimitReg,ProbeReg)
+ // ProbeReg = LoopReg - PageSize
+ // [ProbeReg] = 0
+ // if (ProbeReg > RoundReg) goto LoopMBB
+ // ContinueMBB:
+ // RSP = RSP - RAX
+ // [rest of original MBB]
+
+ // Set up the new basic blocks
+ MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+ MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
+ MF.insert(MBBIter, RoundMBB);
+ MF.insert(MBBIter, LoopMBB);
+ MF.insert(MBBIter, ContinueMBB);
+
+ // Split MBB and move the tail portion down to ContinueMBB.
+ MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
+ ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
+ ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+ // Some useful constants
+ const int64_t ThreadEnvironmentStackLimit = 0x10;
+ const int64_t PageSize = 0x1000;
+ const int64_t PageMask = ~(PageSize - 1);
+
+ // Registers we need. For the normal case we use virtual
+ // registers. For the prolog expansion we use RAX, RCX and RDX.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *RegClass = &X86::GR64RegClass;
+ const unsigned SizeReg = InProlog ? (unsigned)X86::RAX
+ : MRI.createVirtualRegister(RegClass),
+ ZeroReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ CopyReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ TestReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ FinalReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ RoundedReg = InProlog ? (unsigned)X86::RDX
+ : MRI.createVirtualRegister(RegClass),
+ LimitReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ JoinReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass),
+ ProbeReg = InProlog ? (unsigned)X86::RCX
+ : MRI.createVirtualRegister(RegClass);
+
+ // SP-relative offsets where we can save RCX and RDX.
+ int64_t RCXShadowSlot = 0;
+ int64_t RDXShadowSlot = 0;
+
+ // If inlining in the prolog, save RCX and RDX.
+ // Future optimization: don't save or restore if not live in.
+ if (InProlog) {
+ // Compute the offsets. We need to account for things already
+ // pushed onto the stack at this point: return address, frame
+ // pointer (if used), and callee saves.
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
+ const bool HasFP = hasFP(MF);
+ RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+ RDXShadowSlot = RCXShadowSlot + 8;
+ // Emit the saves.
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RCXShadowSlot)
+ .addReg(X86::RCX);
+ addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+ RDXShadowSlot)
+ .addReg(X86::RDX);
+ } else {
+ // Not in the prolog. Copy RAX to a virtual reg.
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
+ }
+
+ // Add code to MBB to check for overflow and set the new target stack pointer
+ // to zero if so.
+ BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
+ .addReg(ZeroReg, RegState::Undef)
+ .addReg(ZeroReg, RegState::Undef);
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
+ BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
+ .addReg(CopyReg)
+ .addReg(SizeReg);
+ BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg)
+ .addReg(TestReg)
+ .addReg(ZeroReg);
+
+ // FinalReg now holds final stack pointer value, or zero if
+ // allocation would overflow. Compare against the current stack
+ // limit from the thread environment block. Note this limit is the
+ // lowest touched page on the stack, not the point at which the OS
+ // will cause an overflow exception, so this is just an optimization
+ // to avoid unnecessarily touching pages that are below the current
+ // SP but already commited to the stack by the OS.
+ BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
+ .addReg(0)
+ .addImm(1)
+ .addReg(0)
+ .addImm(ThreadEnvironmentStackLimit)
+ .addReg(X86::GS);
+ BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
+ // Jump if the desired stack pointer is at or above the stack limit.
+ BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
+
+ // Add code to roundMBB to round the final stack pointer to a page boundary.
+ BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
+ .addReg(FinalReg)
+ .addImm(PageMask);
+ BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
+
+ // LimitReg now holds the current stack limit, RoundedReg page-rounded
+ // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
+ // and probe until we reach RoundedReg.
+ if (!InProlog) {
+ BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
+ .addReg(LimitReg)
+ .addMBB(RoundMBB)
+ .addReg(ProbeReg)
+ .addMBB(LoopMBB);
+ }
+
+ addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
+ false, -PageSize);
+
+ // Probe by storing a byte onto the stack.
+ BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
+ .addReg(ProbeReg)
+ .addImm(1)
+ .addReg(0)
+ .addImm(0)
+ .addReg(0)
+ .addImm(0);
+ BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
+ .addReg(RoundedReg)
+ .addReg(ProbeReg);
+ BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB);
+
+ MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
+
+ // If in prolog, restore RDX and RCX.
+ if (InProlog) {
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+ X86::RCX),
+ X86::RSP, false, RCXShadowSlot);
+ addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+ X86::RDX),
+ X86::RSP, false, RDXShadowSlot);
+ }
+
+ // Now that the probing is done, add code to continueMBB to update
+ // the stack pointer for real.
+ BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(SizeReg);
+
+ // Add the control flow edges we need.
+ MBB.addSuccessor(ContinueMBB);
+ MBB.addSuccessor(RoundMBB);
+ RoundMBB->addSuccessor(LoopMBB);
+ LoopMBB->addSuccessor(ContinueMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+
+ // Mark all the instructions added to the prolog as frame setup.
+ if (InProlog) {
+ for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
+ BeforeMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineInstr &MI : *RoundMBB) {
+ MI.setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineInstr &MI : *LoopMBB) {
+ MI.setFlag(MachineInstr::FrameSetup);
+ }
+ for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
+ CMBBI != ContinueMBBI; ++CMBBI) {
+ CMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+ }
+
+ // Possible TODO: physreg liveness for InProlog case.
+
+ return ContinueMBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
unsigned CallOp;
@@ -478,6 +728,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
Symbol = "_chkstk";
MachineInstrBuilder CI;
+ MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
// All current stack probes take AX and SP as input, clobber flags, and
// preserve all registers. x86_64 probes leave RSP unmodified.
@@ -507,6 +758,26 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addReg(X86::RSP)
.addReg(X86::RAX);
}
+
+ if (InProlog) {
+ // Apply the frame setup flag to all inserted instrs.
+ for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
+ ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
+ }
+
+ return MBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+
+ assert(InProlog && "ChkStkStub called outside prolog!");
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("__chkstk_stub");
+
+ return MBBI;
}
static unsigned calculateSetFPREG(uint64_t SPAdjust) {
@@ -526,7 +797,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
const MachineFrameInfo *MFI = MF.getFrameInfo();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
unsigned StackAlign = getStackAlignment();
- if (ForceStackAlign) {
+ if (MF.getFunction()->hasFnAttribute("stackrealign")) {
if (MFI->hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
else if (MaxAlign < SlotSize)
@@ -537,15 +808,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- DebugLoc DL,
+ DebugLoc DL, unsigned Reg,
uint64_t MaxAlign) const {
uint64_t Val = -MaxAlign;
- MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
- StackPtr)
- .addReg(StackPtr)
- .addImm(Val)
- .setMIFlag(MachineInstr::FrameSetup);
+ unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+ .addReg(Reg)
+ .addImm(Val)
+ .setMIFlag(MachineInstr::FrameSetup);
// The EFLAGS implicit def is dead.
MI->getOperand(3).setIsDead();
@@ -646,6 +916,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
+ bool IsFunclet = MBB.isEHFuncletEntry();
+ EHPersonality Personality = EHPersonality::Unknown;
+ if (Fn->hasPersonalityFn())
+ Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ bool FnHasClrFunclet =
+ MMI.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
+ bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
bool HasFP = hasFP(MF);
bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -655,9 +932,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned FramePtr = TRI->getFrameRegister(MF);
const unsigned MachineFramePtr =
STI.isTarget64BitILP32()
- ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
- : FramePtr;
+ ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
unsigned BasePtr = TRI->getBaseRegister();
+
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
DebugLoc DL;
// Add RETADDR move area to callee saved frame size.
@@ -723,6 +1002,24 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
uint64_t NumBytes = 0;
int stackGrowth = -SlotSize;
+ // Find the funclet establisher parameter
+ unsigned Establisher = X86::NoRegister;
+ if (IsClrFunclet)
+ Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
+ else if (IsFunclet)
+ Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
+
+ if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
+ // Immediately spill establisher into the home slot.
+ // The runtime cares about this.
+ // MOV64mr %rdx, 16(%rsp)
+ unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
+ .addReg(Establisher)
+ .setMIFlag(MachineInstr::FrameSetup);
+ MBB.addLiveIn(Establisher);
+ }
+
if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -739,7 +1036,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Get the offset of the stack slot for the EBP register, which is
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
// Update the frame offset adjustment.
- MFI->setOffsetAdjustment(-NumBytes);
+ if (!IsFunclet)
+ MFI->setOffsetAdjustment(-NumBytes);
+ else
+ assert(MFI->getOffsetAdjustment() == -(int)NumBytes &&
+ "should calculate same local variable offset for funclets");
// Save EBP/RBP into the appropriate stack slot.
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
@@ -765,35 +1066,46 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- if (!IsWin64Prologue) {
+ if (!IsWin64Prologue && !IsFunclet) {
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
- }
- if (NeedsDwarfCFI) {
- // Mark effective beginning of when frame pointer becomes valid.
- // Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
- BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+ if (NeedsDwarfCFI) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ // Define the current CFA to use the EBP/RBP register.
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
+ nullptr, DwarfFramePtr));
+ }
}
- // Mark the FramePtr as live-in in every block.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- I->addLiveIn(MachineFramePtr);
+ // Mark the FramePtr as live-in in every block. Don't do this again for
+ // funclet prologues.
+ if (!IsFunclet) {
+ for (MachineBasicBlock &EveryMBB : MF)
+ EveryMBB.addLiveIn(MachineFramePtr);
+ }
} else {
+ assert(!IsFunclet && "funclets without FPs not yet implemented");
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ // For EH funclets, only allocate enough space for outgoing calls. Save the
+ // NumBytes value that we would've used for the parent frame.
+ unsigned ParentFrameNumBytes = NumBytes;
+ if (IsFunclet)
+ NumBytes = getWinEHFuncletFrameSize(MF);
+
// Skip the callee-saved push instructions.
bool PushedRegs = false;
int StackOffset = 2 * stackGrowth;
while (MBBI != MBB.end() &&
+ MBBI->getFlag(MachineInstr::FrameSetup) &&
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
@@ -818,9 +1130,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Don't do this for Win64, it needs to realign the stack after the prologue.
- if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
+ if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+ BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
}
// If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -839,7 +1151,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
uint64_t AlignedNumBytes = NumBytes;
- if (IsWin64Prologue && TRI->needsStackRealignment(MF))
+ if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
// Check whether EAX is livein for this function.
@@ -876,26 +1188,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
// We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
- .setMIFlag(MachineInstr::FrameSetup);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
}
- // Save a pointer to the MI where we set AX.
- MachineBasicBlock::iterator SetRAX = MBBI;
- --SetRAX;
-
// Call __chkstk, __chkstk_ms, or __alloca.
- emitStackProbeCall(MF, MBB, MBBI, DL);
-
- // Apply the frame setup flag to all inserted instrs.
- for (; SetRAX != MBBI; ++SetRAX)
- SetRAX->setFlag(MachineInstr::FrameSetup);
+ emitStackProbe(MF, MBB, MBBI, DL, true);
if (isEAXAlive) {
// Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
+ MachineInstr *MI =
+ addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
+ StackPtr, false, NumBytes - 4);
MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
@@ -909,19 +1213,72 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
int SEHFrameOffset = 0;
+ unsigned SPOrEstablisher;
+ if (IsFunclet) {
+ if (IsClrFunclet) {
+ // The establisher parameter passed to a CLR funclet is actually a pointer
+ // to the (mostly empty) frame of its nearest enclosing funclet; we have
+ // to find the root function establisher frame by loading the PSPSym from
+ // the intermediate frame.
+ unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+ MachinePointerInfo NoInfo;
+ MBB.addLiveIn(Establisher);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
+ Establisher, false, PSPSlotOffset)
+ .addMemOperand(MF.getMachineMemOperand(
+ NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
+ ;
+ // Save the root establisher back into the current funclet's (mostly
+ // empty) frame, in case a sub-funclet or the GC needs it.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
+ false, PSPSlotOffset)
+ .addReg(Establisher)
+ .addMemOperand(
+ MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
+ MachineMemOperand::MOVolatile,
+ SlotSize, SlotSize));
+ }
+ SPOrEstablisher = Establisher;
+ } else {
+ SPOrEstablisher = StackPtr;
+ }
+
if (IsWin64Prologue && HasFP) {
- SEHFrameOffset = calculateSetFPREG(NumBytes);
+ // Set RBP to a small fixed offset from RSP. In the funclet case, we base
+ // this calculation on the incoming establisher, which holds the value of
+ // RSP from the parent frame at the end of the prologue.
+ SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
if (SEHFrameOffset)
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
- StackPtr, false, SEHFrameOffset);
+ SPOrEstablisher, false, SEHFrameOffset);
else
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
+ .addReg(SPOrEstablisher);
- if (NeedsWinCFI)
+ // If this is not a funclet, emit the CFI describing our frame pointer.
+ if (NeedsWinCFI && !IsFunclet) {
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
.addImm(FramePtr)
.addImm(SEHFrameOffset)
.setMIFlag(MachineInstr::FrameSetup);
+ if (isAsynchronousEHPersonality(Personality))
+ MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
+ }
+ } else if (IsFunclet && STI.is32Bit()) {
+ // Reset EBP / ESI to something good for funclets.
+ MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
+ // If we're a catch funclet, we can be returned to via catchret. Save ESP
+ // into the registration node so that the runtime will restore it for us.
+ if (!MBB.isCleanupFuncletEntry()) {
+ assert(Personality == EHPersonality::MSVC_CXX);
+ unsigned FrameReg;
+ int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
+ int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
+ // ESP is the first field, so no extra displacement is needed.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
+ false, EHRegOffset)
+ .addReg(X86::ESP);
+ }
}
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
@@ -932,7 +1289,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
int FI;
if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
if (X86::FR64RegClass.contains(Reg)) {
- int Offset = getFrameIndexOffset(MF, FI);
+ unsigned IgnoredFrameReg;
+ int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
Offset += SEHFrameOffset;
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
@@ -948,14 +1306,33 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
.setMIFlag(MachineInstr::FrameSetup);
+ if (FnHasClrFunclet && !IsFunclet) {
+ // Save the so-called Initial-SP (i.e. the value of the stack pointer
+ // immediately after the prolog) into the PSPSlot so that funclets
+ // and the GC can recover it.
+ unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+ auto PSPInfo = MachinePointerInfo::getFixedStack(
+ MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
+ PSPSlotOffset)
+ .addReg(StackPtr)
+ .addMemOperand(MF.getMachineMemOperand(
+ PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+ SlotSize, SlotSize));
+ }
+
// Realign stack after we spilled callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Win64 requires aligning the stack after the prologue.
if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
- BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+ BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
}
+ // We already dealt with stack realignment and funclets above.
+ if (IsFunclet && STI.is32Bit())
+ return;
+
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
@@ -964,7 +1341,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Update the base pointer with the current stack pointer.
unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
- .addReg(StackPtr)
+ .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) {
// Stash value of base pointer. Saving RSP instead of EBP shortens
@@ -972,18 +1349,21 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
- .addReg(StackPtr)
+ .addReg(SPOrEstablisher)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (X86FI->getHasSEHFramePtrSave()) {
+ if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
// Stash the value of the frame pointer relative to the base pointer for
// Win32 EH. This supports Win32 EH, which does the inverse of the above:
// it recovers the frame pointer from the base pointer rather than the
// other way around.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
- addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
- getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+ unsigned UsedReg;
+ int Offset =
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ assert(UsedReg == BasePtr);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
.addReg(FramePtr)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -1015,6 +1395,69 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue(
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
}
+static bool isFuncletReturnInstr(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case X86::CATCHRET:
+ case X86::CLEANUPRET:
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("impossible");
+}
+
+// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
+// stack. It holds a pointer to the bottom of the root function frame. The
+// establisher frame pointer passed to a nested funclet may point to the
+// (mostly empty) frame of its parent funclet, but it will need to find
+// the frame of the root function to access locals. To facilitate this,
+// every funclet copies the pointer to the bottom of the root function
+// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
+// same offset for the PSPSym in the root function frame that's used in the
+// funclets' frames allows each funclet to dynamically accept any ancestor
+// frame as its establisher argument (the runtime doesn't guarantee the
+// immediate parent for some reason lost to history), and also allows the GC,
+// which uses the PSPSym for some bookkeeping, to find it in any funclet's
+// frame with only a single offset reported for the entire method.
+unsigned
+X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
+ const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
+ // getFrameIndexReferenceFromSP has an out ref parameter for the stack
+ // pointer register; pass a dummy that we ignore
+ unsigned SPReg;
+ int Offset = getFrameIndexReferenceFromSP(MF, Info.PSPSymFrameIdx, SPReg);
+ assert(Offset >= 0);
+ return static_cast<unsigned>(Offset);
+}
+
+unsigned
+X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
+ // This is the size of the pushed CSRs.
+ unsigned CSSize =
+ MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+ // This is the amount of stack a funclet needs to allocate.
+ unsigned UsedSize;
+ EHPersonality Personality =
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn());
+ if (Personality == EHPersonality::CoreCLR) {
+ // CLR funclets need to hold enough space to include the PSPSym, at the
+ // same offset from the stack pointer (immediately after the prolog) as it
+ // resides at in the main function.
+ UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
+ } else {
+ // Other funclets just need enough stack for outgoing call arguments.
+ UsedSize = MF.getFrameInfo()->getMaxCallFrameSize();
+ }
+ // RBP is not included in the callee saved register block. After pushing RBP,
+ // everything is 16 byte aligned. Everything we allocate before an outgoing
+ // call must also be 16 byte aligned.
+ unsigned FrameSizeMinusRBP =
+ RoundUpToAlignment(CSSize + UsedSize, getStackAlignment());
+ // Subtract out the size of the callee saved registers. This is how much stack
+ // each funclet will allocate.
+ return FrameSizeMinusRBP - CSSize;
+}
+
void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1027,12 +1470,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
const bool Is64BitILP32 = STI.isTarget64BitILP32();
unsigned FramePtr = TRI->getFrameRegister(MF);
unsigned MachineFramePtr =
- Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
- : FramePtr;
+ Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsWinCFI =
IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
+ bool IsFunclet = isFuncletReturnInstr(MBBI);
+ MachineBasicBlock *TargetMBB = nullptr;
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI->getStackSize();
@@ -1040,7 +1484,27 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
- if (hasFP(MF)) {
+ if (MBBI->getOpcode() == X86::CATCHRET) {
+ // SEH shouldn't use catchret.
+ assert(!isAsynchronousEHPersonality(
+ classifyEHPersonality(MF.getFunction()->getPersonalityFn())) &&
+ "SEH should not use CATCHRET");
+
+ NumBytes = getWinEHFuncletFrameSize(MF);
+ assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+ TargetMBB = MBBI->getOperand(0).getMBB();
+
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else if (MBBI->getOpcode() == X86::CLEANUPRET) {
+ NumBytes = getWinEHFuncletFrameSize(MF);
+ assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
NumBytes = FrameSize - CSSize;
@@ -1052,7 +1516,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Pop EBP.
BuildMI(MBB, MBBI, DL,
- TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
+ TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
} else {
NumBytes = StackSize - CSSize;
}
@@ -1063,26 +1528,50 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock::iterator PI = std::prev(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
- !PI->isTerminator())
+ if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+ (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+ Opc != X86::DBG_VALUE && !PI->isTerminator())
break;
--MBBI;
}
MachineBasicBlock::iterator FirstCSPop = MBBI;
+ if (TargetMBB) {
+ // Fill EAX/RAX with the address of the target block.
+ unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX;
+ if (STI.is64Bit()) {
+ // LEA64r TargetMBB(%rip), %rax
+ BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(TargetMBB)
+ .addReg(0);
+ } else {
+ // MOV32ri $TargetMBB, %eax
+ BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg)
+ .addMBB(TargetMBB);
+ }
+ // Record that we've taken the address of TargetMBB and no longer just
+ // reference it in a terminator.
+ TargetMBB->setHasAddressTaken();
+ }
+
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
if (NumBytes || MFI->hasVarSizedObjects())
- mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+ NumBytes += mergeSPUpdates(MBB, MBBI, true);
// If dynamic alloca is used, then reset esp to point to the last callee-saved
// slot before popping them off! Same applies for the case, when stack was
- // realigned.
- if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+ // realigned. Don't do this if this was a funclet epilogue, since the funclets
+ // will not do realignment or dynamic stack allocation.
+ if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) &&
+ !IsFunclet) {
if (TRI->needsStackRealignment(MF))
MBBI = FirstCSPop;
unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
@@ -1134,9 +1623,24 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
+// NOTE: this only has a subset of the full frame index logic. In
+// particular, the FI < 0 and AfterFPPop logic is handled in
+// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly
+// (probably?) it should be moved into here.
+int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // We can't calculate offset from frame pointer if the stack is realigned,
+ // so enforce usage of stack/base pointer. The base pointer is used when we
+ // have dynamic allocas in addition to dynamic realignment.
+ if (TRI->hasBasePointer(MF))
+ FrameReg = TRI->getBaseRegister();
+ else if (TRI->needsStackRealignment(MF))
+ FrameReg = TRI->getStackRegister();
+ else
+ FrameReg = TRI->getFrameRegister(MF);
+
// Offset will hold the offset from the stack pointer at function entry to the
// object.
// We need to factor in additional offsets applied during the prologue to the
@@ -1207,48 +1711,62 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
return Offset + FPDelta;
}
-int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const {
- // We can't calculate offset from frame pointer if the stack is realigned,
- // so enforce usage of stack/base pointer. The base pointer is used when we
- // have dynamic allocas in addition to dynamic realignment.
- if (TRI->hasBasePointer(MF))
- FrameReg = TRI->getBaseRegister();
- else if (TRI->needsStackRealignment(MF))
- FrameReg = TRI->getStackRegister();
- else
- FrameReg = TRI->getFrameRegister(MF);
- return getFrameIndexOffset(MF, FI);
-}
-
-// Simplified from getFrameIndexOffset keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
+// Simplified from getFrameIndexReference keeping only StackPointer cases
+int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Does not include any dynamic realign.
const uint64_t StackSize = MFI->getStackSize();
{
#ifndef NDEBUG
- // Note: LLVM arranges the stack as:
- // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
- // > "Stack Slots" (<--SP)
- // We can always address StackSlots from RSP. We can usually (unless
- // needsStackRealignment) address CSRs from RSP, but sometimes need to
- // address them from RBP. FixedObjects can be placed anywhere in the stack
- // frame depending on their specific requirements (i.e. we can actually
- // refer to arguments to the function which are stored in the *callers*
- // frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
- // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
-
- assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
-
- // We don't handle tail calls, and shouldn't be seeing them
- // either.
+ // LLVM arranges the stack as follows:
+ // ...
+ // ARG2
+ // ARG1
+ // RETADDR
+ // PUSH RBP <-- RBP points here
+ // PUSH CSRs
+ // ~~~~~~~ <-- possible stack realignment (non-win64)
+ // ...
+ // STACK OBJECTS
+ // ... <-- RSP after prologue points here
+ // ~~~~~~~ <-- possible stack realignment (win64)
+ //
+ // if (hasVarSizedObjects()):
+ // ... <-- "base pointer" (ESI/RBX) points here
+ // DYNAMIC ALLOCAS
+ // ... <-- RSP points here
+ //
+ // Case 1: In the simple case of no stack realignment and no dynamic
+ // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
+ // with fixed offsets from RSP.
+ //
+ // Case 2: In the case of stack realignment with no dynamic allocas, fixed
+ // stack objects are addressed with RBP and regular stack objects with RSP.
+ //
+ // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
+ // to address stack arguments for outgoing calls and nothing else. The "base
+ // pointer" points to local variables, and RBP points to fixed objects.
+ //
+ // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
+ // answer we give is relative to the SP after the prologue, and not the
+ // SP in the middle of the function.
+
+ assert((!MFI->isFixedObjectIndex(FI) || !TRI->needsStackRealignment(MF) ||
+ STI.isTargetWin64()) &&
+ "offset from fixed object to SP is not static");
+
+ // We don't handle tail calls, and shouldn't be seeing them either.
int TailCallReturnAddrDelta =
MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
#endif
}
+ // Fill in FrameReg output argument.
+ FrameReg = TRI->getStackRegister();
+
// This is how the math works out:
//
// %rsp grows (i.e. gets lower) left to right. Each box below is
@@ -1280,15 +1798,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
return Offset + StackSize;
}
-// Simplified from getFrameIndexReference keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
-
- FrameReg = TRI->getStackRegister();
- return getFrameIndexOffsetFromSP(MF, FI);
-}
bool X86FrameLowering::assignCalleeSavedSpillSlots(
MachineFunction &MF, const TargetRegisterInfo *TRI,
@@ -1358,6 +1867,11 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBB.findDebugLoc(MI);
+ // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
+ // for us, and there are no XMM CSRs on Win32.
+ if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
+ return true;
+
// Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1399,6 +1913,22 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (CSI.empty())
return false;
+ if (isFuncletReturnInstr(MI) && STI.isOSWindows()) {
+ // Don't restore CSRs in 32-bit EH funclets. Matches
+ // spillCalleeSavedRegisters.
+ if (STI.is32Bit())
+ return true;
+ // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
+ // funclets. emitEpilogue transforms these to normal jumps.
+ if (MI->getOpcode() == X86::CATCHRET) {
+ const Function *Func = MBB.getParent()->getFunction();
+ bool IsSEH = isAsynchronousEHPersonality(
+ classifyEHPersonality(Func->getPersonalityFn()));
+ if (IsSEH)
+ return true;
+ }
+ }
+
DebugLoc DL = MBB.findDebugLoc(MI);
// Reload XMMs from stack frame.
@@ -1420,7 +1950,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
!X86::GR32RegClass.contains(Reg))
continue;
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
+ .setMIFlag(MachineInstr::FrameDestroy);
}
return true;
}
@@ -1450,8 +1981,16 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// Spill the BasePtr if it's used.
- if (TRI->hasBasePointer(MF))
+ if (TRI->hasBasePointer(MF)) {
SavedRegs.set(TRI->getBaseRegister());
+
+ // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
+ if (MF.getMMI().hasEHFunclets()) {
+ int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize);
+ X86FI->setHasSEHFramePtrSave(true);
+ X86FI->setSEHFramePtrSaveIndex(FI);
+ }
+ }
}
static bool
@@ -1545,11 +2084,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// The MOV R10, RAX needs to be in a different block, since the RET we emit in
// allocMBB needs to be last (terminating) instruction.
- for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
- e = PrologueMBB.livein_end();
- i != e; i++) {
- allocMBB->addLiveIn(*i);
- checkMBB->addLiveIn(*i);
+ for (const auto &LI : PrologueMBB.liveins()) {
+ allocMBB->addLiveIn(LI);
+ checkMBB->addLiveIn(LI);
}
if (IsNested)
@@ -1682,8 +2219,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(StackSize);
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(Reg10);
- MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
@@ -1821,11 +2356,9 @@ void X86FrameLowering::adjustForHiPEPrologue(
MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
- for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
- E = PrologueMBB.livein_end();
- I != E; I++) {
- stackCheckMBB->addLiveIn(*I);
- incStackMBB->addLiveIn(*I);
+ for (const auto &LI : PrologueMBB.liveins()) {
+ stackCheckMBB->addLiveIn(LI);
+ incStackMBB->addLiveIn(LI);
}
MF.push_front(incStackMBB);
@@ -1870,16 +2403,84 @@ void X86FrameLowering::adjustForHiPEPrologue(
.addReg(ScratchReg), PReg, false, SPLimitOffset);
BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
- stackCheckMBB->addSuccessor(&PrologueMBB, 99);
- stackCheckMBB->addSuccessor(incStackMBB, 1);
- incStackMBB->addSuccessor(&PrologueMBB, 99);
- incStackMBB->addSuccessor(incStackMBB, 1);
+ stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
+ stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
+ incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
+ incStackMBB->addSuccessor(incStackMBB, {1, 100});
}
#ifdef XDEBUG
MF.verify();
#endif
}
+bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const {
+
+ if (Offset <= 0)
+ return false;
+
+ if (Offset % SlotSize)
+ return false;
+
+ int NumPops = Offset / SlotSize;
+ // This is only worth it if we have at most 2 pops.
+ if (NumPops != 1 && NumPops != 2)
+ return false;
+
+ // Handle only the trivial case where the adjustment directly follows
+ // a call. This is the most common one, anyway.
+ if (MBBI == MBB.begin())
+ return false;
+ MachineBasicBlock::iterator Prev = std::prev(MBBI);
+ if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
+ return false;
+
+ unsigned Regs[2];
+ unsigned FoundRegs = 0;
+
+ auto RegMask = Prev->getOperand(1);
+
+ auto &RegClass =
+ Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
+ // Try to find up to NumPops free registers.
+ for (auto Candidate : RegClass) {
+
+ // Poor man's liveness:
+ // Since we're immediately after a call, any register that is clobbered
+ // by the call and not defined by it can be considered dead.
+ if (!RegMask.clobbersPhysReg(Candidate))
+ continue;
+
+ bool IsDef = false;
+ for (const MachineOperand &MO : Prev->implicit_operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) {
+ IsDef = true;
+ break;
+ }
+ }
+
+ if (IsDef)
+ continue;
+
+ Regs[FoundRegs++] = Candidate;
+ if (FoundRegs == (unsigned)NumPops)
+ break;
+ }
+
+ if (FoundRegs == 0)
+ return false;
+
+ // If we found only one free register, but need two, reuse the same one twice.
+ while (FoundRegs < (unsigned)NumPops)
+ Regs[FoundRegs++] = Regs[0];
+
+ for (int i = 0; i < NumPops; ++i)
+ BuildMI(MBB, MBBI, DL,
+ TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
+
+ return true;
+}
+
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -1895,8 +2496,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
- if (Amount == 0)
- return;
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
@@ -1904,15 +2503,68 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = getStackAlignment();
Amount = RoundUpToAlignment(Amount, StackAlign);
+ MachineModuleInfo &MMI = MF.getMMI();
+ const Function *Fn = MF.getFunction();
+ bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool DwarfCFI = !WindowsCFI &&
+ (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+
+ // If we have any exception handlers in this function, and we adjust
+ // the SP before calls, we may need to indicate this to the unwinder
+ // using GNU_ARGS_SIZE. Note that this may be necessary even when
+ // Amount == 0, because the preceding function may have set a non-0
+ // GNU_ARGS_SIZE.
+ // TODO: We don't need to reset this between subsequent functions,
+ // if it didn't change.
+ bool HasDwarfEHHandlers = !WindowsCFI &&
+ !MF.getMMI().getLandingPads().empty();
+
+ if (HasDwarfEHHandlers && !isDestroy &&
+ MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
+
+ if (Amount == 0)
+ return;
+
// Factor out the amount that gets handled inside the sequence
// (Pushes of argument for frame setup, callee pops for frame destroy)
Amount -= InternalAmt;
+ // TODO: This is needed only if we require precise CFA.
+ // If this is a callee-pop calling convention, emit a CFA adjust for
+ // the amount the callee popped.
+ if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
+
if (Amount) {
// Add Amount to SP to destroy a frame, and subtract to setup.
int Offset = isDestroy ? Amount : -Amount;
- BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+
+ if (!(Fn->optForMinSize() &&
+ adjustStackWithPops(MBB, I, DL, Offset)))
+ BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+ }
+
+ if (DwarfCFI && !hasFP(MF)) {
+ // If we don't have FP, but need to generate unwind information,
+ // we need to set the correct CFA offset after the stack adjustment.
+ // How much we adjust the CFA offset depends on whether we're emitting
+ // CFI only for EH purposes or for debugging. EH only requires the CFA
+ // offset to be correct at each call site, while for debugging we want
+ // it to be more precise.
+ int CFAOffset = Amount;
+ // TODO: When not using precise CFA, we also need to adjust for the
+ // InternalAmt here.
+
+ if (CFAOffset) {
+ CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+ }
}
+
return;
}
@@ -1933,12 +2585,136 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
assert(MBB.getParent() && "Block is not attached to a function!");
+ // Win64 has strict requirements in terms of epilogue and we are
+ // not taking a chance at messing with them.
+ // I.e., unless this block is already an exit block, we can't use
+ // it as an epilogue.
+ if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
+ return false;
+
if (canUseLEAForSPInEpilogue(*MBB.getParent()))
return true;
// If we cannot use LEA to adjust SP, we may need to use ADD, which
- // clobbers the EFLAGS. Check that none of the terminators reads the
- // EFLAGS, and if one uses it, conservatively assume this is not
+ // clobbers the EFLAGS. Check that we do not need to preserve it,
+ // otherwise, conservatively assume this is not
// safe to insert the epilogue here.
- return !terminatorsNeedFlagsAsInput(MBB);
+ return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
+}
+
+bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // If we may need to emit frameless compact unwind information, give
+ // up as this is currently broken: PR25614.
+ return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF);
+}
+
+MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool RestoreSP) const {
+ assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
+ assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
+ assert(STI.is32Bit() && !Uses64BitFramePtr &&
+ "restoring EBP/ESI on non-32-bit target");
+
+ MachineFunction &MF = *MBB.getParent();
+ unsigned FramePtr = TRI->getFrameRegister(MF);
+ unsigned BasePtr = TRI->getBaseRegister();
+ WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // FIXME: Don't set FrameSetup flag in catchret case.
+
+ int FI = FuncInfo.EHRegNodeFrameIndex;
+ int EHRegSize = MFI->getObjectSize(FI);
+
+ if (RestoreSP) {
+ // MOV32rm -EHRegSize(%ebp), %esp
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
+ X86::EBP, true, -EHRegSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ unsigned UsedReg;
+ int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
+ int EndOffset = -EHRegOffset - EHRegSize;
+ FuncInfo.EHRegNodeEndOffset = EndOffset;
+
+ if (UsedReg == FramePtr) {
+ // ADD $offset, %ebp
+ unsigned ADDri = getADDriOpcode(false, EndOffset);
+ BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
+ .addReg(FramePtr)
+ .addImm(EndOffset)
+ .setMIFlag(MachineInstr::FrameSetup)
+ ->getOperand(3)
+ .setIsDead();
+ assert(EndOffset >= 0 &&
+ "end of registration object above normal EBP position!");
+ } else if (UsedReg == BasePtr) {
+ // LEA offset(%ebp), %esi
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
+ FramePtr, false, EndOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ // MOV32rm SavedEBPOffset(%esi), %ebp
+ assert(X86FI->getHasSEHFramePtrSave());
+ int Offset =
+ getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+ assert(UsedReg == BasePtr);
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
+ UsedReg, true, Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
+ }
+ return MBBI;
+}
+
+unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
+ // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
+ unsigned Offset = 16;
+ // RBP is immediately pushed.
+ Offset += SlotSize;
+ // All callee-saved registers are then pushed.
+ Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+ // Every funclet allocates enough stack space for the largest outgoing call.
+ Offset += getWinEHFuncletFrameSize(MF);
+ return Offset;
+}
+
+void X86FrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
+ // If this function isn't doing Win64-style C++ EH, we don't need to do
+ // anything.
+ const Function *Fn = MF.getFunction();
+ if (!STI.is64Bit() || !MF.getMMI().hasEHFunclets() ||
+ classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX)
+ return;
+
+ // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
+ // relative to RSP after the prologue. Find the offset of the last fixed
+ // object, so that we can allocate a slot immediately following it. If there
+ // were no fixed objects, use offset -SlotSize, which is immediately after the
+ // return address. Fixed objects have negative frame indices.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ int64_t MinFixedObjOffset = -SlotSize;
+ for (int I = MFI->getObjectIndexBegin(); I < 0; ++I)
+ MinFixedObjOffset = std::min(MinFixedObjOffset, MFI->getObjectOffset(I));
+
+ int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
+ int UnwindHelpFI =
+ MFI->CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+ MF.getWinEHFuncInfo()->UnwindHelpFrameIdx = UnwindHelpFI;
+
+ // Store -2 into UnwindHelp on function entry. We have to scan forwards past
+ // other frame setup instructions.
+ MachineBasicBlock &MBB = MF.front();
+ auto MBBI = MBB.begin();
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+ addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
+ UnwindHelpFI)
+ .addImm(-2);
}
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 495cfcd..3ab41b4 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -47,11 +47,17 @@ public:
unsigned StackPtr;
- /// Emit a call to the target's stack probe function. This is required for all
+ /// Emit target stack probe code. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
- /// the number of bytes to probe in RAX/EAX.
- void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL) const;
+ /// the number of bytes to probe in RAX/EAX. Returns instruction just
+ /// after the expansion.
+ MachineInstr *emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ bool InProlog) const;
+
+ /// Replace a StackProbe inline-stub with the actual probe code inline.
+ void inlineStackProbe(MachineFunction &MF,
+ MachineBasicBlock &PrologMBB) const override;
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -91,11 +97,9 @@ public:
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
bool needsFrameIndexResolution(const MachineFunction &MF) const override;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
- int getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const;
int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
@@ -103,6 +107,11 @@ public:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const override;
+
/// Check the instruction before/after the passed instruction. If
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
@@ -125,7 +134,9 @@ public:
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
-private:
+ /// Returns true if the target will correctly handle shrink wrapping.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
/// convertArgMovsToPushes - This method tries to convert a call sequence
/// that uses sub and mov instructions to put the argument onto the stack
/// into a series of pushes.
@@ -135,22 +146,56 @@ private:
MachineBasicBlock::iterator I,
uint64_t Amount) const;
- uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
-
/// Wraps up getting a CFI index and building a MachineInstr for it.
void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, MCCFIInstruction CFIInst) const;
+ /// Sets up EBP and optionally ESI based on the incoming EBP value. Only
+ /// needed for 32-bit. Used in funclet prologues and at catchret destinations.
+ MachineBasicBlock::iterator
+ restoreWin32EHStackPointers(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ bool RestoreSP = false) const;
+
+private:
+ uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
+ /// Emit target stack probe as a call to a helper function
+ MachineInstr *emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
+ /// Emit target stack probe as an inline sequence.
+ MachineInstr *emitStackProbeInline(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
+ /// Emit a stub to later inline the target stack probe.
+ MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, bool InProlog) const;
+
/// Aligns the stack pointer by ANDing it with -MaxAlign.
void BuildStackAlignAND(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
- uint64_t MaxAlign) const;
+ unsigned Reg, uint64_t MaxAlign) const;
+
+ /// Make small positive stack adjustments using POPs.
+ bool adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ int Offset) const;
/// Adjusts the stack pointer using LEA, SUB, or ADD.
MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL, int64_t Offset,
bool InEpilogue) const;
+
+ unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const;
+
+ unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d5351d2..4414e47 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -46,9 +46,8 @@ STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
namespace {
- /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
- /// SDValue's instead of register numbers for the leaves of the matched
- /// tree.
+ /// This corresponds to X86AddressMode, but uses SDValue's instead of register
+ /// numbers for the leaves of the matched tree.
struct X86ISelAddressMode {
enum {
RegBase,
@@ -87,8 +86,7 @@ namespace {
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
}
- /// isRIPRelative - Return true if this addressing mode is already RIP
- /// relative.
+ /// Return true if this addressing mode is already RIP-relative.
bool isRIPRelative() const {
if (BaseType != RegBase) return false;
if (RegisterSDNode *RegNode =
@@ -147,21 +145,25 @@ namespace {
namespace {
//===--------------------------------------------------------------------===//
- /// ISel - X86 specific code to select X86 machine instructions for
+ /// ISel - X86-specific code to select X86 machine instructions for
/// SelectionDAG operations.
///
class X86DAGToDAGISel final : public SelectionDAGISel {
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// OptForSize - If true, selector should try to optimize for code size
- /// instead of performance.
+ /// If true, selector should try to optimize for code size instead of
+ /// performance.
bool OptForSize;
+ /// If true, selector should try to optimize for minimum code size.
+ bool OptForMinSize;
+
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
+ : SelectionDAGISel(tm, OptLevel), OptForSize(false),
+ OptForMinSize(false) {}
const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@@ -184,8 +186,7 @@ namespace {
return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
}
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
+ // True if the 64-bit immediate fits in a 32-bit sign-extended field.
inline bool i64immSExt32(SDNode *N) const {
uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
return (int64_t)v == (int32_t)v;
@@ -196,50 +197,50 @@ namespace {
private:
SDNode *Select(SDNode *N) override;
- SDNode *SelectGather(SDNode *N, unsigned Opc);
- SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
-
- bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
- bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
- bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
- bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
- bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ SDNode *selectGather(SDNode *N, unsigned Opc);
+ SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
+
+ bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+ bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+ bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
+ bool matchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
+ bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
- bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
- bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
+ bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+ bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
- bool SelectLEAAddr(SDValue N, SDValue &Base,
+ bool selectMOV64Imm32(SDValue N, SDValue &Imm);
+ bool selectLEAAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
+ bool selectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+ bool selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
- bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ bool selectScalarSSELoad(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
SDValue &NodeWithChain);
- bool TryFoldLoad(SDNode *P, SDValue N,
+ bool tryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
+ /// Implement addressing mode selection for inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- void EmitSpecialCodeForMain();
+ void emitSpecialCodeForMain();
inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
SDValue &Base, SDValue &Scale,
@@ -252,7 +253,7 @@ namespace {
: AM.Base_Reg;
Scale = getI8Imm(AM.Scale, DL);
Index = AM.IndexReg;
- // These are 32-bit even in 64-bit mode since RIP relative offset
+ // These are 32-bit even in 64-bit mode since RIP-relative offset
// is 32-bit.
if (AM.GV)
Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
@@ -283,32 +284,105 @@ namespace {
Segment = CurDAG->getRegister(0, MVT::i32);
}
- /// getI8Imm - Return a target constant with the specified value, of type
- /// i8.
+ // Utility function to determine whether we should avoid selecting
+ // immediate forms of instructions for better code size or not.
+ // At a high level, we'd like to avoid such instructions when
+ // we have similar constants used within the same basic block
+ // that can be kept in a register.
+ //
+ bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
+ uint32_t UseCount = 0;
+
+ // Do not want to hoist if we're not optimizing for size.
+ // TODO: We'd like to remove this restriction.
+ // See the comment in X86InstrInfo.td for more info.
+ if (!OptForSize)
+ return false;
+
+ // Walk all the users of the immediate.
+ for (SDNode::use_iterator UI = N->use_begin(),
+ UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
+
+ SDNode *User = *UI;
+
+ // This user is already selected. Count it as a legitimate use and
+ // move on.
+ if (User->isMachineOpcode()) {
+ UseCount++;
+ continue;
+ }
+
+ // We want to count stores of immediates as real uses.
+ if (User->getOpcode() == ISD::STORE &&
+ User->getOperand(1).getNode() == N) {
+ UseCount++;
+ continue;
+ }
+
+ // We don't currently match users that have > 2 operands (except
+ // for stores, which are handled above)
+ // Those instruction won't match in ISEL, for now, and would
+ // be counted incorrectly.
+ // This may change in the future as we add additional instruction
+ // types.
+ if (User->getNumOperands() != 2)
+ continue;
+
+ // Immediates that are used for offsets as part of stack
+ // manipulation should be left alone. These are typically
+ // used to indicate SP offsets for argument passing and
+ // will get pulled into stores/pushes (implicitly).
+ if (User->getOpcode() == X86ISD::ADD ||
+ User->getOpcode() == ISD::ADD ||
+ User->getOpcode() == X86ISD::SUB ||
+ User->getOpcode() == ISD::SUB) {
+
+ // Find the other operand of the add/sub.
+ SDValue OtherOp = User->getOperand(0);
+ if (OtherOp.getNode() == N)
+ OtherOp = User->getOperand(1);
+
+ // Don't count if the other operand is SP.
+ RegisterSDNode *RegNode;
+ if (OtherOp->getOpcode() == ISD::CopyFromReg &&
+ (RegNode = dyn_cast_or_null<RegisterSDNode>(
+ OtherOp->getOperand(1).getNode())))
+ if ((RegNode->getReg() == X86::ESP) ||
+ (RegNode->getReg() == X86::RSP))
+ continue;
+ }
+
+ // ... otherwise, count this and move on.
+ UseCount++;
+ }
+
+ // If we have more than 1 use, then recommend for hoisting.
+ return (UseCount > 1);
+ }
+
+ /// Return a target constant with the specified value of type i8.
inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
}
- /// getI32Imm - Return a target constant with the specified value, of type
- /// i32.
+ /// Return a target constant with the specified value, of type i32.
inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
}
- /// getGlobalBaseReg - Return an SDNode that returns the value of
- /// the global base register. Output instructions required to
- /// initialize the global base register, if necessary.
- ///
+ /// Return an SDNode that returns the value of the global base register.
+ /// Output instructions required to initialize the global base register,
+ /// if necessary.
SDNode *getGlobalBaseReg();
- /// getTargetMachine - Return a reference to the TargetMachine, casted
- /// to the target-specific type.
+ /// Return a reference to the TargetMachine, casted to the target-specific
+ /// type.
const X86TargetMachine &getTargetMachine() const {
return static_cast<const X86TargetMachine &>(TM);
}
- /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
- /// to the target-specific type.
+ /// Return a reference to the TargetInstrInfo, casted to the target-specific
+ /// type.
const X86InstrInfo *getInstrInfo() const {
return Subtarget->getInstrInfo();
}
@@ -386,9 +460,9 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
return true;
}
-/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
-static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = OrigChain.getOperand(0);
@@ -418,7 +492,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
}
-/// isCalleeLoad - Return true if call address is a load and it can be
+/// Return true if call address is a load and it can be
/// moved below CALLSEQ_START and the chains leading up to the call.
/// Return the CALLSEQ_START by reference as a second output.
/// In the case of a tail call, there isn't a callseq node between the call
@@ -461,12 +535,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
- // OptForSize is used in pattern predicates that isel is matching.
- OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ // OptFor[Min]Size are used in pattern predicates that isel is matching.
+ OptForSize = MF->getFunction()->optForSize();
+ OptForMinSize = MF->getFunction()->optForMinSize();
+ assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+ SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
if (OptLevel != CodeGenOpt::None &&
// Only does this when target favors doesn't favor register indirect
@@ -500,7 +576,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain, HasCallSeq))
continue;
- MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+ moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
@@ -577,9 +653,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
}
-/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
-/// the main function.
-void X86DAGToDAGISel::EmitSpecialCodeForMain() {
+/// Emit any code that needs to be executed only in the main function.
+void X86DAGToDAGISel::emitSpecialCodeForMain() {
if (Subtarget->isTargetCygMing()) {
TargetLowering::ArgListTy Args;
auto &DL = CurDAG->getDataLayout();
@@ -599,7 +674,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
// If this is main, emit special code for main.
if (const Function *Fn = MF->getFunction())
if (Fn->hasExternalLinkage() && Fn->getName() == "main")
- EmitSpecialCodeForMain();
+ emitSpecialCodeForMain();
}
static bool isDispSafeForFrameIndex(int64_t Val) {
@@ -612,7 +687,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
return isInt<31>(Val);
}
-bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
X86ISelAddressMode &AM) {
// Cannot combine ExternalSymbol displacements with integer offsets.
if (Offset != 0 && (AM.ES || AM.MCSym))
@@ -634,7 +709,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
}
-bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
SDValue Address = N->getOperand(1);
// load gs:0 -> GS segment register.
@@ -658,11 +733,10 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
return true;
}
-/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
-/// into an addressing mode. These wrap things that will resolve down into a
-/// symbol reference. If no match is possible, this returns true, otherwise it
-/// returns false.
-bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
+/// mode. These wrap things that will resolve down into a symbol reference.
+/// If no match is possible, this returns true, otherwise it returns false.
+bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
// If the addressing mode already has a symbol as the displacement, we can
// never match another symbol.
if (AM.hasSymbolicDisplacement())
@@ -685,7 +759,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
X86ISelAddressMode Backup = AM;
AM.GV = G->getGlobal();
AM.SymbolFlags = G->getTargetFlags();
- if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(G->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -694,7 +768,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
AM.CP = CP->getConstVal();
AM.Align = CP->getAlignment();
AM.SymbolFlags = CP->getTargetFlags();
- if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -710,7 +784,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
X86ISelAddressMode Backup = AM;
AM.BlockAddr = BA->getBlockAddress();
AM.SymbolFlags = BA->getTargetFlags();
- if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+ if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
AM = Backup;
return true;
}
@@ -758,11 +832,10 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
return true;
}
-/// MatchAddress - Add the specified node to the specified addressing mode,
-/// returning true if it cannot be done. This just pattern matches for the
-/// addressing mode.
-bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
- if (MatchAddressRecursively(N, AM, 0))
+/// Add the specified node to the specified addressing mode, returning true if
+/// it cannot be done. This just pattern matches for the addressing mode.
+bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
+ if (matchAddressRecursively(N, AM, 0))
return true;
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -790,15 +863,49 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
return false;
}
+bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth) {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+ !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
+ !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ return false;
+ AM = Backup;
+
+ // If we couldn't fold both operands into the address at the same time,
+ // see if we can just put each operand into a register and fold at least
+ // the add.
+ if (AM.BaseType == X86ISelAddressMode::RegBase &&
+ !AM.Base_Reg.getNode() &&
+ !AM.IndexReg.getNode()) {
+ N = Handle.getValue();
+ AM.Base_Reg = N.getOperand(0);
+ AM.IndexReg = N.getOperand(1);
+ AM.Scale = 1;
+ return false;
+ }
+ N = Handle.getValue();
+ return true;
+}
+
// Insert a node into the DAG at least before the Pos node's position. This
// will reposition the node as needed, and will assign it a node ID that is <=
// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
// IDs! The selection DAG must no longer depend on their uniqueness when this
// is used.
-static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
if (N.getNode()->getNodeId() == -1 ||
N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
- DAG.RepositionNode(Pos.getNode(), N.getNode());
+ DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
N.getNode()->setNodeId(Pos.getNode()->getNodeId());
}
}
@@ -807,7 +914,7 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
// safe. This allows us to convert the shift and and into an h-register
// extract and a scaled index. Returns false if the simplification is
// performed.
-static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -835,12 +942,12 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, Eight);
- InsertDAGNode(DAG, N, Srl);
- InsertDAGNode(DAG, N, NewMask);
- InsertDAGNode(DAG, N, And);
- InsertDAGNode(DAG, N, ShlCount);
- InsertDAGNode(DAG, N, Shl);
+ insertDAGNode(DAG, N, Eight);
+ insertDAGNode(DAG, N, Srl);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, And);
+ insertDAGNode(DAG, N, ShlCount);
+ insertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
@@ -850,7 +957,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
// allows us to fold the shift into this addressing mode. Returns false if the
// transform succeeded.
-static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -880,9 +987,9 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, NewMask);
- InsertDAGNode(DAG, N, NewAnd);
- InsertDAGNode(DAG, N, NewShift);
+ insertDAGNode(DAG, N, NewMask);
+ insertDAGNode(DAG, N, NewAnd);
+ insertDAGNode(DAG, N, NewShift);
DAG.ReplaceAllUsesWith(N, NewShift);
AM.Scale = 1 << ShiftAmt;
@@ -917,7 +1024,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
// Note that this function assumes the mask is provided as a mask *after* the
// value is shifted. The input chain may or may not match that, but computing
// such a mask is trivial.
-static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
uint64_t Mask,
SDValue Shift, SDValue X,
X86ISelAddressMode &AM) {
@@ -973,7 +1080,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
assert(X.getValueType() != VT);
// We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
- InsertDAGNode(DAG, N, NewX);
+ insertDAGNode(DAG, N, NewX);
X = NewX;
}
SDLoc DL(N);
@@ -987,10 +1094,10 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
// these nodes. We continually insert before 'N' in sequence as this is
// essentially a pre-flattened and pre-sorted sequence of nodes. There is no
// hierarchy left to express.
- InsertDAGNode(DAG, N, NewSRLAmt);
- InsertDAGNode(DAG, N, NewSRL);
- InsertDAGNode(DAG, N, NewSHLAmt);
- InsertDAGNode(DAG, N, NewSHL);
+ insertDAGNode(DAG, N, NewSRLAmt);
+ insertDAGNode(DAG, N, NewSRL);
+ insertDAGNode(DAG, N, NewSHLAmt);
+ insertDAGNode(DAG, N, NewSHL);
DAG.ReplaceAllUsesWith(N, NewSHL);
AM.Scale = 1 << AMShiftAmt;
@@ -998,7 +1105,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
return false;
}
-bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
DEBUG({
@@ -1007,7 +1114,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
});
// Limit recursion.
if (Depth > 5)
- return MatchAddressBase(N, AM);
+ return matchAddressBase(N, AM);
// If this is already a %rip relative address, we can only merge immediates
// into it. Instead of handling this in every case, we handle it here.
@@ -1020,7 +1127,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
return true;
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
- if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+ if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
return false;
return true;
}
@@ -1038,19 +1145,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
case ISD::Constant: {
uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
- if (!FoldOffsetIntoAddress(Val, AM))
+ if (!foldOffsetIntoAddress(Val, AM))
return false;
break;
}
case X86ISD::Wrapper:
case X86ISD::WrapperRIP:
- if (!MatchWrapper(N, AM))
+ if (!matchWrapper(N, AM))
return false;
break;
case ISD::LOAD:
- if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+ if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
return false;
break;
@@ -1087,7 +1194,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
ConstantSDNode *AddVal =
cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
- if (!FoldOffsetIntoAddress(Disp, AM))
+ if (!foldOffsetIntoAddress(Disp, AM))
return false;
}
@@ -1119,7 +1226,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Try to fold the mask and shift into the scale, and return false if we
// succeed.
- if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
return false;
break;
}
@@ -1153,7 +1260,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
ConstantSDNode *AddVal =
cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
- if (FoldOffsetIntoAddress(Disp, AM))
+ if (foldOffsetIntoAddress(Disp, AM))
Reg = N.getNode()->getOperand(0);
} else {
Reg = N.getNode()->getOperand(0);
@@ -1179,7 +1286,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
- if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+ if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
@@ -1227,56 +1334,26 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- InsertDAGNode(*CurDAG, N, Zero);
- InsertDAGNode(*CurDAG, N, Neg);
+ insertDAGNode(*CurDAG, N, Zero);
+ insertDAGNode(*CurDAG, N, Neg);
return false;
}
- case ISD::ADD: {
- // Add an artificial use to this node so that we can keep track of
- // it if it gets CSE'd with a different node.
- HandleSDNode Handle(N);
-
- X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
- !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
- return false;
- AM = Backup;
-
- // Try again after commuting the operands.
- if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
- !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+ case ISD::ADD:
+ if (!matchAdd(N, AM, Depth))
return false;
- AM = Backup;
-
- // If we couldn't fold both operands into the address at the same time,
- // see if we can just put each operand into a register and fold at least
- // the add.
- if (AM.BaseType == X86ISelAddressMode::RegBase &&
- !AM.Base_Reg.getNode() &&
- !AM.IndexReg.getNode()) {
- N = Handle.getValue();
- AM.Base_Reg = N.getOperand(0);
- AM.IndexReg = N.getOperand(1);
- AM.Scale = 1;
- return false;
- }
- N = Handle.getValue();
break;
- }
case ISD::OR:
- // Handle "X | C" as "X + C" iff X is known to have C bits clear.
- if (CurDAG->isBaseWithConstantOffset(N)) {
- X86ISelAddressMode Backup = AM;
- ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-
- // Start with the LHS as an addr mode.
- if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
- !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
- return false;
- AM = Backup;
- }
+ // We want to look through a transform in InstCombine and DAGCombiner that
+ // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+ // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
+ // An 'lea' can then be used to match the shift (multiply) and add:
+ // and $1, %esi
+ // lea (%rsi, %rdi, 8), %rax
+ if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
+ !matchAdd(N, AM, Depth))
+ return false;
break;
case ISD::AND: {
@@ -1299,27 +1376,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
uint64_t Mask = N.getConstantOperandVal(1);
// Try to fold the mask and shift into an extract and scale.
- if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
return false;
// Try to fold the mask and shift directly into the scale.
- if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
return false;
// Try to swap the mask and shift to place shifts which can be done as
// a scale on the outside of the mask.
- if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+ if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
return false;
break;
}
}
- return MatchAddressBase(N, AM);
+ return matchAddressBase(N, AM);
}
-/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// Helper for MatchAddress. Add the specified node to the
/// specified addressing mode without any further recursion.
-bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
// If so, check to see if the scale index register is set.
@@ -1339,7 +1416,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
@@ -1362,7 +1439,7 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
// If Base is 0, the whole address is in index and the Scale is 1
if (isa<ConstantSDNode>(Base)) {
- assert(dyn_cast<ConstantSDNode>(Base)->isNullValue() &&
+ assert(cast<ConstantSDNode>(Base)->isNullValue() &&
"Unexpected base in gather/scatter");
Scale = getI8Imm(1, DL);
Base = CurDAG->getRegister(0, MVT::i32);
@@ -1375,14 +1452,14 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
-/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// Returns true if it is able to pattern match an addressing mode.
/// It returns the operands which make up the maximal addressing mode it can
/// match by reference.
///
/// Parent is the parent node of the addr operand that is being matched. It
/// is always a load, store, atomic node, or null. It is only null when
/// checking memory operands for inline asm nodes.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
X86ISelAddressMode AM;
@@ -1404,7 +1481,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
}
- if (MatchAddress(N, AM))
+ if (matchAddress(N, AM))
return false;
MVT VT = N.getSimpleValueType();
@@ -1420,14 +1497,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
return true;
}
-/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
-/// match a load whose top elements are either undef or zeros. The load flavor
-/// is derived from the type of N, which is either v4f32 or v2f64.
+/// Match a scalar SSE load. In particular, we want to match a load whose top
+/// elements are either undef or zeros. The load flavor is derived from the
+/// type of N, which is either v4f32 or v2f64.
///
/// We also return:
/// PatternChainNode: this is the matched node that has a chain input and
/// output.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
@@ -1439,7 +1516,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
- if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
return true;
}
@@ -1457,7 +1534,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
PatternNodeWithChain = SDValue(LD, 0);
return true;
@@ -1466,7 +1543,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
}
-bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
+bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CN->getZExtValue();
if ((uint32_t)ImmVal != (uint64_t)ImmVal)
@@ -1495,10 +1572,10 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
return TM.getCodeModel() == CodeModel::Small;
}
-bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
+ if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
return false;
SDLoc DL(N);
@@ -1533,9 +1610,9 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
return true;
}
-/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// Calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1546,7 +1623,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
SDValue Copy = AM.Segment;
SDValue T = CurDAG->getRegister(0, MVT::i32);
AM.Segment = T;
- if (MatchAddress(N, AM))
+ if (matchAddress(N, AM))
return false;
assert (T == AM.Segment);
AM.Segment = Copy;
@@ -1572,13 +1649,12 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
Complexity++;
// FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
- // to a LEA. This is determined with some expermentation but is by no means
+ // to a LEA. This is determined with some experimentation but is by no means
// optimal (especially for code size consideration). LEA is nice because of
// its three-address nature. Tweak the cost function again when we can run
// convertToThreeAddress() at register allocation time.
if (AM.hasSymbolicDisplacement()) {
- // For X86-64, we should always use lea to materialize RIP relative
- // addresses.
+ // For X86-64, always use LEA to materialize RIP-relative addresses.
if (Subtarget->is64Bit())
Complexity = 4;
else
@@ -1596,8 +1672,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
return true;
}
-/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+/// This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1621,7 +1697,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
}
-bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
@@ -1630,14 +1706,13 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
!IsLegalToFold(N, P, P, OptLevel))
return false;
- return SelectAddr(N.getNode(),
+ return selectAddr(N.getNode(),
N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
-/// getGlobalBaseReg - Return an SDNode that returns the value of
-/// the global base register. Output instructions required to
-/// initialize the global base register, if necessary.
-///
+/// Return an SDNode that returns the value of the global base register.
+/// Output instructions required to initialize the global base register,
+/// if necessary.
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
auto &DL = MF->getDataLayout();
@@ -1828,7 +1903,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
return Val;
}
-SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
+SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
return nullptr;
@@ -1841,7 +1916,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
SDValue Ptr = Node->getOperand(1);
SDValue Val = Node->getOperand(2);
SDValue Base, Scale, Index, Disp, Segment;
- if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
+ if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
return nullptr;
// Which index into the table.
@@ -1933,9 +2008,9 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
return CurDAG->getMergeValues(RetVals, dl).getNode();
}
-/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
-/// any uses which require the SF or OF bits to be accurate.
-static bool HasNoSignedComparisonUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// or OF bits to be accurate.
+static bool hasNoSignedComparisonUses(SDNode *N) {
// Examine each user of the node.
for (SDNode::use_iterator UI = N->use_begin(),
UE = N->use_end(); UI != UE; ++UI) {
@@ -1995,9 +2070,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
return true;
}
-/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
-/// is suitable for doing the {load; increment or decrement; store} to modify
-/// transformation.
+/// Check whether or not the chain ending in StoreNode is suitable for doing
+/// the {load; increment or decrement; store} to modify transformation.
static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue StoredVal, SelectionDAG *CurDAG,
LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -2081,8 +2155,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
return true;
}
-/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
-/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+/// Get the appropriate X86 opcode for an in-memory increment or decrement.
+/// Opc should be X86ISD::DEC or X86ISD::INC.
static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
if (Opc == X86ISD::DEC) {
if (LdVT == MVT::i64) return X86::DEC64m;
@@ -2099,9 +2173,8 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
-/// SelectGather - Customized ISel for GATHER operations.
-///
-SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+/// Customized ISel for GATHER operations.
+SDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) {
// Operands of Gather: VSrc, Base, VIdx, VMask, Scale
SDValue Chain = Node->getOperand(0);
SDValue VSrc = Node->getOperand(2);
@@ -2148,6 +2221,27 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
default: break;
+ case ISD::BRIND: {
+ if (Subtarget->isTargetNaCl())
+ // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
+ // leave the instruction alone.
+ break;
+ if (Subtarget->isTarget64BitILP32()) {
+ // Converts a 32-bit register to a 64-bit, zero-extended version of
+ // it. This is needed because x86-64 can do many things, but jmp %r32
+ // ain't one of them.
+ const SDValue &Target = Node->getOperand(1);
+ assert(Target.getSimpleValueType() == llvm::MVT::i32);
+ SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
+ SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
+ Node->getOperand(0), ZextTarget);
+ ReplaceUses(SDValue(Node, 0), Brind);
+ SelectCode(ZextTarget.getNode());
+ SelectCode(Brind.getNode());
+ return nullptr;
+ }
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
@@ -2190,7 +2284,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
}
- SDNode *RetVal = SelectGather(Node, Opc);
+ SDNode *RetVal = selectGather(Node, Opc);
if (RetVal)
// We already called ReplaceUses inside SelectGather.
return nullptr;
@@ -2217,7 +2311,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
case ISD::ATOMIC_LOAD_ADD: {
- SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+ SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
if (RetVal)
return RetVal;
break;
@@ -2404,10 +2498,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
// Multiply is commmutative.
if (!foldedLoad) {
- foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
if (foldedLoad)
std::swap(N0, N1);
}
@@ -2549,7 +2643,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
SDValue InFlag;
@@ -2557,7 +2651,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
- if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
Move =
SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
@@ -2692,7 +2786,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue N1 = Node->getOperand(1);
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
- HasNoSignedComparisonUses(Node))
+ hasNoSignedComparisonUses(Node))
N0 = N0.getOperand(0);
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -2709,7 +2803,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// For example, convert "testl %eax, $8" to "testb %al, $8"
if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
(!(C->getZExtValue() & 0x80) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2743,7 +2837,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// For example, "testl %eax, $2048" to "testb %ah, $8".
if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
(!(C->getZExtValue() & 0x8000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
// Shift the immediate right by 8 bits.
SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
dl, MVT::i8);
@@ -2781,7 +2875,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
N0.getValueType() != MVT::i16 &&
(!(C->getZExtValue() & 0x8000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
MVT::i16);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2804,7 +2898,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
N0.getValueType() == MVT::i64 &&
(!(C->getZExtValue() & 0x80000000) ||
- HasNoSignedComparisonUses(Node))) {
+ hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
MVT::i32);
SDValue Reg = N0.getNode()->getOperand(0);
@@ -2854,7 +2948,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
break;
SDValue Base, Scale, Index, Disp, Segment;
- if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+ if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
Base, Scale, Index, Disp, Segment))
break;
@@ -2903,7 +2997,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
case InlineAsm::Constraint_v: // not offsetable ??
case InlineAsm::Constraint_m: // memory
case InlineAsm::Constraint_X:
- if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
+ if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
@@ -2916,9 +3010,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
return false;
}
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG,
+/// ready for instruction scheduling.
FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new X86DAGToDAGISel(TM, OptLevel);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0f29b51..0927c2f 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -67,19 +68,14 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
cl::Hidden);
-// Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
- SDValue V2);
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- TD = TM.getDataLayout();
+ MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
// Set up the TargetLowering object.
- static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird. It always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
@@ -118,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-
- // The _ftol2 runtime function has an unusual calling conv, which
- // is modeled by a special pseudo-instruction.
- setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
- setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}
if (Subtarget->isTargetDarwin()) {
@@ -175,14 +164,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
+ // f32/f64 are legal, f80 is custom.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ else
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
} else if (!Subtarget->useSoftFloat()) {
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
- // FILD for other targets.
+ // FILD or VCVTUSI2SS/SD for other targets.
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
@@ -206,23 +199,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
}
- // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
- // are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
- setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
-
// Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
- if (X86ScalarSSEf32) {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
- // f32 and f64 cases are Legal, f80 case is not
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ if (!Subtarget->useSoftFloat()) {
+ // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
+ // are Legal, f80 is custom lowered.
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
+ setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
+
+ if (X86ScalarSSEf32) {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ // f32 and f64 cases are Legal, f80 case is not
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ }
} else {
- setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
- setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
+ setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
}
// Handle FP_TO_UINT by promoting the destination to a larger signed
@@ -232,8 +231,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
- setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+ // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
+ } else {
+ setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
+ setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
+ }
} else if (!Subtarget->useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
@@ -242,14 +247,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else
+ // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
- }
- if (isTargetFTOL()) {
- // Use the _ftol2 runtime function, which has a pseudo-instruction
- // to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}
@@ -274,8 +276,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -295,6 +296,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
+ setOperationAction(ISD::BR_CC , MVT::f128, Expand);
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
@@ -302,6 +304,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
@@ -312,7 +315,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
- setOperationAction(ISD::FREM , MVT::f32 , Expand);
+
+ if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
+ // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
+ // is. We should promote the value to 64-bits to solve this.
+ // This is what the CRT headers do - `fmodf` is an inline header
+ // function casting to f64 and calling `fmod`.
+ setOperationAction(ISD::FREM , MVT::f32 , Promote);
+ } else {
+ setOperationAction(ISD::FREM , MVT::f32 , Expand);
+ }
+
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
@@ -404,15 +417,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
+ setOperationAction(ISD::SELECT , MVT::f128 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
+ setOperationAction(ISD::SETCC , MVT::f128 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::SELECT , MVT::i64 , Custom);
setOperationAction(ISD::SETCC , MVT::i64 , Custom);
+ setOperationAction(ISD::SETCCE , MVT::i64 , Custom);
}
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
@@ -456,8 +475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
// Expand certain atomics
- for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
- MVT VT = IntVTs[i];
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
@@ -473,13 +491,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
- if (Subtarget->is64Bit()) {
- setExceptionPointerRegister(X86::RAX);
- setExceptionSelectorRegister(X86::RDX);
- } else {
- setExceptionPointerRegister(X86::EAX);
- setExceptionSelectorRegister(X86::EDX);
- }
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
@@ -492,8 +503,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
- if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
- // TargetInfo::X86_64ABIBuiltinVaList
+ if (Subtarget->is64Bit()) {
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
} else {
@@ -505,7 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -613,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87.
+ // Long double always uses X87, except f128 in MMX.
if (!Subtarget->useSoftFloat()) {
+ if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
+ addRegisterClass(MVT::f128, &X86::FR128RegClass);
+ ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+ setOperationAction(ISD::FABS , MVT::f128, Custom);
+ setOperationAction(ISD::FNEG , MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+ }
+
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -846,15 +864,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+ // ISD::CTTZ v2i64 - scalarization is faster.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
+
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- // Do not attempt to custom lower non-power-of-2 vectors
- if (!isPowerOf2_32(VT.getVectorNumElements()))
- continue;
- // Do not attempt to custom lower non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -892,13 +912,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector())
- continue;
-
+ for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v2i64);
setOperationAction(ISD::OR, VT, Promote);
@@ -1036,6 +1050,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
+ if (Subtarget->hasXOP()) {
+ setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
+ setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::v2i64, Custom);
+ setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
+ setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
+ setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::v4i64, Custom);
+ }
+
if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
addRegisterClass(MVT::v32i8, &X86::VR256RegClass);
addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1126,7 +1151,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::v8i32, Custom);
setOperationAction(ISD::CTPOP, MVT::v4i64, Custom);
- if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
+ setOperationAction(ISD::CTTZ, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
+
+ if (Subtarget->hasAnyFMA()) {
setOperationAction(ISD::FMA, MVT::v8f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f64, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
@@ -1202,6 +1236,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
setOperationAction(ISD::MUL, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::SMAX, MVT::v32i8, Custom);
+ setOperationAction(ISD::SMAX, MVT::v16i16, Custom);
+ setOperationAction(ISD::SMAX, MVT::v8i32, Custom);
+ setOperationAction(ISD::UMAX, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMAX, MVT::v16i16, Custom);
+ setOperationAction(ISD::UMAX, MVT::v8i32, Custom);
+ setOperationAction(ISD::SMIN, MVT::v32i8, Custom);
+ setOperationAction(ISD::SMIN, MVT::v16i16, Custom);
+ setOperationAction(ISD::SMIN, MVT::v8i32, Custom);
+ setOperationAction(ISD::UMIN, MVT::v32i8, Custom);
+ setOperationAction(ISD::UMIN, MVT::v16i16, Custom);
+ setOperationAction(ISD::UMIN, MVT::v8i32, Custom);
}
// In the customized shift lowering, the legal cases in AVX2 will be
@@ -1243,15 +1290,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget->hasInt256())
setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
-
// Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
- for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-256-bit vectors
- if (!VT.is256BitVector())
- continue;
-
+ for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::AND, VT, Promote);
AddPromotedToType (ISD::AND, VT, MVT::v4i64);
setOperationAction(ISD::OR, VT, Promote);
@@ -1293,6 +1333,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
setOperationAction(ISD::XOR, MVT::i1, Legal);
setOperationAction(ISD::OR, MVT::i1, Legal);
setOperationAction(ISD::AND, MVT::i1, Legal);
@@ -1311,6 +1352,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
@@ -1318,19 +1360,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
- if (Subtarget->is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
- }
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
@@ -1348,12 +1381,62 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
+ setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
+ setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
+ if (Subtarget->hasVLX()){
+ setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
+ setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
+ setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
+
+ setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+ } else {
+ setOperationAction(ISD::MLOAD, MVT::v8i32, Custom);
+ setOperationAction(ISD::MLOAD, MVT::v8f32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v8i32, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v8f32, Custom);
+ }
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
if (Subtarget->hasDQI()) {
- setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ }
+ }
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
}
setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
@@ -1386,7 +1469,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
@@ -1395,6 +1478,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
@@ -1439,9 +1523,49 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
if (Subtarget->hasCDI()) {
- setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
- }
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Expand);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
+
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ } else {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Expand);
+ }
+ } // Subtarget->hasCDI()
+
if (Subtarget->hasDQI()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
@@ -1455,7 +1579,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::OR, VT, Legal);
setOperationAction(ISD::XOR, VT, Legal);
}
- if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
+ if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
@@ -1481,15 +1605,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::MGATHER, VT, Legal);
+ setOperationAction(ISD::MSCATTER, VT, Custom);
}
}
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
-
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
-
+ for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
}
@@ -1515,22 +1635,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
@@ -1541,19 +1674,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
- for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
- const MVT VT = (MVT::SimpleValueType)i;
+ setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
+ setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
+ if (Subtarget->hasVLX())
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
- const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Expand);
+ }
- // Do not attempt to promote non-512-bit vectors.
- if (!VT.is512BitVector())
- continue;
+ for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
- if (EltSize < 32) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Legal);
- }
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v8i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v8i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v8i64);
}
}
@@ -1571,6 +1716,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i1, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@@ -1595,8 +1742,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- if (!Subtarget->is64Bit())
+ if (!Subtarget->is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ }
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -1604,9 +1753,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// FIXME: We really should do custom legalization for addition and
// subtraction on x86-32 once PR3203 is fixed. We really can't do much better
// than generic legalization for 64-bit multiplication-with-overflow, though.
- for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+ for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ if (VT == MVT::i64 && !Subtarget->is64Bit())
+ continue;
// Add/Sub/Mul with overflow operations are custom lowered.
- MVT VT = IntVTs[i];
setOperationAction(ISD::SADDO, VT, Custom);
setOperationAction(ISD::UADDO, VT, Custom);
setOperationAction(ISD::SSUBO, VT, Custom);
@@ -1615,7 +1765,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, VT, Custom);
}
-
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1658,12 +1807,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
+ setTargetDAGCombine(ISD::FNEG);
setTargetDAGCombine(ISD::FMA);
+ setTargetDAGCombine(ISD::FMINNUM);
+ setTargetDAGCombine(ISD::FMAXNUM);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::MSTORE);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
@@ -1671,24 +1824,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(ISD::MSCATTER);
+ setTargetDAGCombine(ISD::MGATHER);
computeRegisterProperties(Subtarget->getRegisterInfo());
- // On Darwin, -Os means optimize for size without hurting performance,
- // do not reduce the limit.
MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+ MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
- MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
- MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemmoveOptSize = 4;
setPrefLoopAlignment(4); // 2^4 bytes.
- // Predictable cmov don't hurt on atom because it's in-order.
+ // A predictable cmov does not hurt on an in-order CPU.
+ // FIXME: Use a CPU attribute to trigger this, not a CPU model.
PredictableSelectIsExpensive = !Subtarget->isAtom();
EnableExtLdPromotion = true;
setPrefFunctionAlignment(4); // 2^4 bytes.
@@ -1716,40 +1869,43 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
- const unsigned NumElts = VT.getVectorNumElements();
- const EVT EltVT = VT.getVectorElementType();
- if (VT.is512BitVector()) {
- if (Subtarget->hasAVX512())
- if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
- EltVT == MVT::f32 || EltVT == MVT::f64)
- switch(NumElts) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
- }
- if (Subtarget->hasBWI())
- if (EltVT == MVT::i8 || EltVT == MVT::i16)
- switch(NumElts) {
- case 32: return MVT::v32i1;
- case 64: return MVT::v64i1;
- }
- }
+ if (VT.isSimple()) {
+ MVT VVT = VT.getSimpleVT();
+ const unsigned NumElts = VVT.getVectorNumElements();
+ const MVT EltVT = VVT.getVectorElementType();
+ if (VVT.is512BitVector()) {
+ if (Subtarget->hasAVX512())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ }
+ if (Subtarget->hasBWI())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 32: return MVT::v32i1;
+ case 64: return MVT::v64i1;
+ }
+ }
- if (VT.is256BitVector() || VT.is128BitVector()) {
- if (Subtarget->hasVLX())
- if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
- EltVT == MVT::f32 || EltVT == MVT::f64)
- switch(NumElts) {
- case 2: return MVT::v2i1;
- case 4: return MVT::v4i1;
- case 8: return MVT::v8i1;
- }
- if (Subtarget->hasBWI() && Subtarget->hasVLX())
- if (EltVT == MVT::i8 || EltVT == MVT::i16)
- switch(NumElts) {
- case 8: return MVT::v8i1;
- case 16: return MVT::v16i1;
- case 32: return MVT::v32i1;
- }
+ if (VVT.is256BitVector() || VVT.is128BitVector()) {
+ if (Subtarget->hasVLX())
+ if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+ EltVT == MVT::f32 || EltVT == MVT::f64)
+ switch(NumElts) {
+ case 2: return MVT::v2i1;
+ case 4: return MVT::v4i1;
+ case 8: return MVT::v8i1;
+ }
+ if (Subtarget->hasBWI() && Subtarget->hasVLX())
+ if (EltVT == MVT::i8 || EltVT == MVT::i16)
+ switch(NumElts) {
+ case 8: return MVT::v8i1;
+ case 16: return MVT::v16i1;
+ case 32: return MVT::v32i1;
+ }
+ }
}
return VT.changeVectorElementTypeToInteger();
@@ -1769,9 +1925,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ for (auto *EltTy : STy->elements()) {
unsigned EltAlign = 0;
- getMaxByValAlign(STy->getElementType(i), EltAlign);
+ getMaxByValAlign(EltTy, EltAlign);
if (EltAlign > MaxAlign)
MaxAlign = EltAlign;
if (MaxAlign == 16)
@@ -1821,10 +1977,11 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
- (Subtarget->isUnalignedMemAccessFast() ||
+ (!Subtarget->isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
+ // FIXME: Check if unaligned 32-byte accesses are slow.
if (Subtarget->hasInt256())
return MVT::v8i32;
if (Subtarget->hasFp256())
@@ -1842,6 +1999,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
return MVT::f64;
}
}
+ // This is a compromise. If we reach here, unaligned accesses may be slow on
+ // this target. However, creating smaller, aligned accesses could be even
+ // slower and would certainly be a lot more code.
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
return MVT::i32;
@@ -1860,8 +2020,22 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
bool *Fast) const {
- if (Fast)
- *Fast = Subtarget->isUnalignedMemAccessFast();
+ if (Fast) {
+ switch (VT.getSizeInBits()) {
+ default:
+ // 8-byte and under are always assumed to be fast.
+ *Fast = true;
+ break;
+ case 128:
+ *Fast = !Subtarget->isUnalignedMem16Slow();
+ break;
+ case 256:
+ *Fast = !Subtarget->isUnalignedMem32Slow();
+ break;
+ // TODO: What about AVX-512 (512-bit) accesses?
+ }
+ }
+ // Misaligned accesses of any size are always allowed.
return true;
}
@@ -1964,6 +2138,32 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
return true;
}
+Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+ if (!Subtarget->isTargetAndroid())
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+
+ // Android provides a fixed TLS slot for the SafeStack pointer. See the
+ // definition of TLS_SLOT_SAFESTACK in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ unsigned AddressSpace, Offset;
+ if (Subtarget->is64Bit()) {
+ // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x48;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x24 on i386
+ Offset = 0x24;
+ AddressSpace = 256;
+ }
+
+ return ConstantExpr::getIntToPtr(
+ ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
+ Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
+}
+
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
assert(SrcAS != DestAS && "Expected different address spaces!");
@@ -1977,11 +2177,9 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
#include "X86GenCallingConv.inc"
-bool
-X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
- MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const {
+bool X86TargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
@@ -2001,6 +2199,9 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ if (CallConv == CallingConv::X86_INTR && !Outs.empty())
+ report_fatal_error("X86 interrupts may not return any value");
+
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -2025,7 +2226,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
else if (VA.getLocInfo() == CCValAssign::ZExt)
ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
else if (VA.getLocInfo() == CCValAssign::AExt) {
- if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
+ if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
else
ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
@@ -2114,7 +2315,10 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
+ X86ISD::NodeType opcode = X86ISD::RET_FLAG;
+ if (CallConv == CallingConv::X86_INTR)
+ opcode = X86ISD::IRET;
+ return DAG.getNode(opcode, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2193,7 +2397,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
EVT CopyVT = VA.getLocVT();
// If this is x86-64, and we disabled SSE, we can't return FP values
- if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+ if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
@@ -2244,28 +2448,28 @@ enum StructReturnType {
StackStructReturn
};
static StructReturnType
-callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
if (Outs.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
- if (Flags.isInReg())
+ if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
/// Determines whether a function uses struct return semantics.
static StructReturnType
-argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
if (Ins.empty())
return NotStructReturn;
const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
if (!Flags.isSRet())
return NotStructReturn;
- if (Flags.isInReg())
+ if (Flags.isInReg() || IsMCU)
return RegStructReturn;
return StackStructReturn;
}
@@ -2285,17 +2489,34 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
MachinePointerInfo(), MachinePointerInfo());
}
-/// Return true if the calling convention is one that
-/// supports tail call optimization.
-static bool IsTailCallConvention(CallingConv::ID CC) {
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
- CC == CallingConv::HiPE);
+ CC == CallingConv::HiPE || CC == CallingConv::HHVM);
}
-/// \brief Return true if the calling convention is a C calling convention.
-static bool IsCCallConvention(CallingConv::ID CC) {
- return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
- CC == CallingConv::X86_64_SysV);
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+ switch (CC) {
+ // C calling conventions:
+ case CallingConv::C:
+ case CallingConv::X86_64_Win64:
+ case CallingConv::X86_64_SysV:
+ // Callee pop conventions:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ case CallingConv::X86_FastCall:
+ return true;
+ default:
+ return canGuaranteeTCO(CC);
+ }
+}
+
+/// Return true if the function is being made into a tailcall target by
+/// changing its ABI.
+static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
+ return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -2306,19 +2527,12 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
CallSite CS(CI);
CallingConv::ID CalleeCC = CS.getCallingConv();
- if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+ if (!mayTailCallThisCC(CalleeCC))
return false;
return true;
}
-/// Return true if the function is being made into
-/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
- bool GuaranteedTailCallOpt) {
- return GuaranteedTailCallOpt && IsTailCallConvention(CC);
-}
-
SDValue
X86TargetLowering::LowerMemArgument(SDValue Chain,
CallingConv::ID CallConv,
@@ -2329,7 +2543,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
+ bool AlwaysUseMutable = shouldGuaranteeTCO(
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -2344,6 +2558,19 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
else
ValVT = VA.getValVT();
+ // Calculate SP offset of interrupt parameter, re-arrange the slot normally
+ // taken by a return address.
+ int Offset = 0;
+ if (CallConv == CallingConv::X86_INTR) {
+ const X86Subtarget& Subtarget =
+ static_cast<const X86Subtarget&>(DAG.getSubtarget());
+ // X86 interrupts may take one or two arguments.
+ // On the stack there will be no return address as in regular call.
+ // Offset of last argument need to be set to -4/-8 bytes.
+ // Where offset of the first argument out of two, should be set to 0 bytes.
+ Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
+ }
+
// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
@@ -2352,14 +2579,24 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI->setObjectOffset(FI, Offset);
+ }
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI->setObjectOffset(FI, Offset);
+ }
+
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ SDValue Val = DAG.getLoad(
+ ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, false, 0);
return ExtendedInMem ?
DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
}
@@ -2413,15 +2650,10 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
-SDValue
-X86TargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
- const {
+SDValue X86TargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
@@ -2436,9 +2668,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
+ if (CallConv == CallingConv::X86_INTR) {
+ bool isLegal = Ins.size() == 1 ||
+ (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
+ (!Is64Bit && Ins[1].VT == MVT::i32)));
+ if (!isLegal)
+ report_fatal_error("X86 interrupts may take one or two arguments");
+ }
+
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -2471,6 +2711,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
+ else if (RegVT == MVT::f128)
+ RC = &X86::FR128RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
@@ -2547,8 +2789,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned StackSize = CCInfo.getNextStackOffset();
// Align stack specially for tail calls.
- if (FuncIsMadeTailCallSafe(CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt))
+ if (shouldGuaranteeTCO(CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
// If the function takes variable number of arguments, make a frame index for
@@ -2561,13 +2803,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MFI->CreateFixedObject(1, StackSize, true));
}
- MachineModuleInfo &MMI = MF.getMMI();
- const Function *WinEHParent = nullptr;
- if (MMI.hasWinEHFuncInfo(Fn))
- WinEHParent = MMI.getWinEHParent(Fn);
- bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
- bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
-
// Figure out if XMM registers are in use.
assert(!(Subtarget->useSoftFloat() &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2631,10 +2866,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
RSFIN, DAG.getIntPtrConstant(Offset, dl));
SDValue Store =
- DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo::getFixedStack(
- FuncInfo->getRegSaveFrameIndex(), Offset),
- false, false, 0);
+ DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(),
+ FuncInfo->getRegSaveFrameIndex(), Offset),
+ false, false, 0);
MemOps.push_back(Store);
Offset += 8;
}
@@ -2656,27 +2892,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- } else if (IsWin64 && IsWinEHOutlined) {
- // Get to the caller-allocated home save location. Add 8 to account
- // for the return address.
- int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
- FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
- /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
-
- MMI.getWinEHFuncInfo(Fn)
- .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
- FuncInfo->getRegSaveFrameIndex();
-
- // Store the second integer parameter (rdx) into rsp+16 relative to the
- // stack pointer at the entry of the function.
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy(DAG.getDataLayout()));
- unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
- Chain = DAG.getStore(
- Val.getValue(1), dl, Val, RSFIN,
- MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
- /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
}
if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2723,12 +2938,15 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+ } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
+ // X86 interrupts must pop the error code if present
+ FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
- argsAreStructReturn(Ins) == StackStructReturn)
+ argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -2743,21 +2961,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setArgumentStackSize(StackSize);
- if (IsWinEHParent) {
- if (Is64Bit) {
- int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
- SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
- MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
- SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
- Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
- MachinePointerInfo::getFixedStack(UnwindHelpFI),
- /*isVolatile=*/true,
- /*isNonTemporal=*/false, /*Alignment=*/0);
- } else {
- // Functions using Win32 EH are considered to have opaque SP adjustments
- // to force local variables to be addressed from the frame or base
- // pointers.
- MFI->setHasOpaqueSPAdjustment(true);
+ if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
+ EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+ if (Personality == EHPersonality::CoreCLR) {
+ assert(Is64Bit);
+ // TODO: Add a mechanism to frame lowering that will allow us to indicate
+ // that we'd prefer this slot be allocated towards the bottom of the frame
+ // (i.e. near the stack pointer after allocating the frame). Every
+ // funclet needs a copy of this slot in its (mostly empty) frame, and the
+ // offset from the bottom of this and each funclet's frame must be the
+ // same, so the size of funclets' (mostly empty) frames is dictated by
+ // how far this slot is from the bottom (since they allocate just enough
+ // space to accomodate holding this slot at the correct offset).
+ int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+ EHInfo->PSPSymFrameIdx = PSPSymFI;
}
}
@@ -2777,9 +2994,10 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- return DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
+ return DAG.getStore(
+ Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+ false, false, 0);
}
/// Emit a load of return address if tail call
@@ -2813,11 +3031,24 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
false);
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
- MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), NewReturnAddrFI),
false, false, 0);
return Chain;
}
+/// Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
+ SDValue V2) {
+ unsigned NumElems = VT.getVectorNumElements();
+ SmallVector<int, 8> Mask;
+ Mask.push_back(NumElems);
+ for (unsigned i = 1; i != NumElems; ++i)
+ Mask.push_back(i);
+ return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2835,11 +3066,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget->is64Bit();
bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
- StructReturnType SR = callIsStructReturn(Outs);
+ StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
bool IsSibcall = false;
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
+ if (CallConv == CallingConv::X86_INTR)
+ report_fatal_error("X86 interrupts may not be called directly");
+
if (Attr.getValueAsString() == "true")
isTailCall = false;
@@ -2878,7 +3112,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
++NumTailCalls;
}
- assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+ assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
@@ -2892,13 +3126,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
if (IsSibcall)
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
- IsTailCallConvention(CallConv))
+ canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
@@ -2970,7 +3204,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
break;
case CCValAssign::AExt:
if (Arg.getValueType().isVector() &&
- Arg.getValueType().getScalarType() == MVT::i1)
+ Arg.getValueType().getVectorElementType() == MVT::i1)
Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
else if (RegVT.is128BitVector()) {
// Special case: passing MMX values in XMM registers.
@@ -2987,9 +3221,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Store the argument.
SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0);
+ Chain = DAG.getStore(
+ Chain, dl, Arg, SpillSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, 0);
Arg = SpillSlot;
break;
}
@@ -3125,10 +3360,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Flags, DAG, dl));
} else {
// Store relative to framepointer.
- MemOpChains2.push_back(
- DAG.getStore(ArgChain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
+ MemOpChains2.push_back(DAG.getStore(
+ ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ false, false, 0));
}
}
@@ -3207,7 +3442,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ExtraLoad)
Callee = DAG.getLoad(
getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false,
+ false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -3261,9 +3497,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
- // If this is an invoke in a 32-bit function using an MSVC personality, assume
- // the function clobbers all registers. If an exception is thrown, the runtime
- // will not restore CSRs.
+ // If this is an invoke in a 32-bit function using a funclet-based
+ // personality, assume the function clobbers all registers. If an exception
+ // is thrown, the runtime will not restore CSRs.
// FIXME: Model this more precisely so that we can register allocate across
// the normal edge and spill and fill across the exceptional edge.
if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
@@ -3272,7 +3508,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallerFn->hasPersonalityFn()
? classifyEHPersonality(CallerFn->getPersonalityFn())
: EHPersonality::Unknown;
- if (isMSVCEHPersonality(Pers))
+ if (isFuncletEHPersonality(Pers))
Mask = RegInfo->getNoPreservedMask();
}
@@ -3300,7 +3536,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
- else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+ else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
SR == StackStructReturn)
// If this is a call to a struct-return function, the callee
@@ -3358,8 +3594,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// EDI
// local1 ..
-/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
-/// for a 16 byte align requirement.
+/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
+/// requirement.
unsigned
X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG& DAG) const {
@@ -3380,9 +3616,8 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
return Offset;
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
+/// Return true if the given stack call argument is already available in the
+/// same position (relatively) of the caller's incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
@@ -3435,25 +3670,19 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
}
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization. Targets which want to do tail call
-/// optimization should implement this function.
-bool
-X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG &DAG) const {
- if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+/// Check whether the call is eligible for tail call optimization. Targets
+/// that want to do tail call optimization should implement this function.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ if (!mayTailCallThisCC(CalleeCC))
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
- const MachineFunction &MF = DAG.getMachineFunction();
+ MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = MF.getFunction();
// If the function return type is x86_fp80 and the callee return type is not,
@@ -3474,7 +3703,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
- if (IsTailCallConvention(CalleeCC) && CCMatch)
+ if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
}
@@ -3493,19 +3722,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isCalleeStructRet || isCallerStructRet)
return false;
- // An stdcall/thiscall caller is expected to clean up its arguments; the
- // callee isn't going to do that.
- // FIXME: this is more restrictive than needed. We could produce a tailcall
- // when the stack adjustment matches. For example, with a thiscall that takes
- // only one argument.
- if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
- CallerCC == CallingConv::X86_ThisCall))
- return false;
-
// Do not sibcall optimize vararg calls unless all arguments are passed via
// registers.
if (isVarArg && !Outs.empty()) {
-
// Optimizing for varargs on Win64 is unlikely to be safe without
// additional testing.
if (IsCalleeWin64 || IsCallerWin64)
@@ -3573,6 +3792,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ unsigned StackArgsSize = 0;
+
// If the callee takes no arguments then go on to check the results of the
// call.
if (!Outs.empty()) {
@@ -3587,11 +3808,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CCInfo.AllocateStack(32, 8);
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- if (CCInfo.getNextStackOffset()) {
- MachineFunction &MF = DAG.getMachineFunction();
- if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
- return false;
+ StackArgsSize = CCInfo.getNextStackOffset();
+ if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3642,6 +3861,21 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
+ bool CalleeWillPop =
+ X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
+ MF.getTarget().Options.GuaranteedTailCallOpt);
+
+ if (unsigned BytesToPop =
+ MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+ // If we have bytes to pop, the callee must pop them.
+ bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
+ if (!CalleePopMatches)
+ return false;
+ } else if (CalleeWillPop && StackArgsSize > 0) {
+ // If we don't have bytes to pop, make sure the callee doesn't pop any.
+ return false;
+ }
+
return true;
}
@@ -3688,11 +3922,13 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VPERMILPI:
case X86ISD::VPERM2X128:
case X86ISD::VPERMI:
+ case X86ISD::VPERMV:
+ case X86ISD::VPERMV3:
return true;
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, unsigned TargetMask,
SelectionDAG &DAG) {
switch(Opc) {
@@ -3707,7 +3943,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
}
}
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG) {
switch(Opc) {
default: llvm_unreachable("Unknown x86 shuffle node");
@@ -3772,23 +4008,23 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
return false;
}
-/// isCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
+/// Determines whether the callee is required to pop its own arguments.
+/// Callee pop is necessary to support tail calls.
bool X86::isCalleePop(CallingConv::ID CallingConv,
- bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+ bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
+ // If GuaranteeTCO is true, we force some calls to be callee pop so that we
+ // can guarantee TCO.
+ if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
+ return true;
+
switch (CallingConv) {
default:
return false;
case CallingConv::X86_StdCall:
case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:
+ case CallingConv::X86_VectorCall:
return !is64Bit;
- case CallingConv::Fast:
- case CallingConv::GHC:
- case CallingConv::HiPE:
- if (IsVarArg)
- return false;
- return TailCallOpt;
}
}
@@ -3807,11 +4043,26 @@ static bool isX86CCUnsigned(unsigned X86CC) {
case X86::COND_BE: return true;
case X86::COND_AE: return true;
}
- llvm_unreachable("covered switch fell through?!");
}
-/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
-/// specific condition code, returning the condition code and the LHS/RHS of the
+static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Invalid integer condition!");
+ case ISD::SETEQ: return X86::COND_E;
+ case ISD::SETGT: return X86::COND_G;
+ case ISD::SETGE: return X86::COND_GE;
+ case ISD::SETLT: return X86::COND_L;
+ case ISD::SETLE: return X86::COND_LE;
+ case ISD::SETNE: return X86::COND_NE;
+ case ISD::SETULT: return X86::COND_B;
+ case ISD::SETUGT: return X86::COND_A;
+ case ISD::SETULE: return X86::COND_BE;
+ case ISD::SETUGE: return X86::COND_AE;
+ }
+}
+
+/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
+/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
@@ -3833,19 +4084,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
}
}
- switch (SetCCOpcode) {
- default: llvm_unreachable("Invalid integer condition!");
- case ISD::SETEQ: return X86::COND_E;
- case ISD::SETGT: return X86::COND_G;
- case ISD::SETGE: return X86::COND_GE;
- case ISD::SETLT: return X86::COND_L;
- case ISD::SETLE: return X86::COND_LE;
- case ISD::SETNE: return X86::COND_NE;
- case ISD::SETULT: return X86::COND_B;
- case ISD::SETUGT: return X86::COND_A;
- case ISD::SETULE: return X86::COND_BE;
- case ISD::SETUGE: return X86::COND_AE;
- }
+ return TranslateIntegerX86CC(SetCCOpcode);
}
// First determine if it is required or is profitable to flip the operands.
@@ -3898,8 +4137,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
}
}
-/// hasFPCMov - is there a floating point cmov for the specific X86 condition
-/// code. Current x86 isa includes the following FP cmov instructions:
+/// Is there a floating point cmov for the specific X86 condition code?
+/// Current x86 isa includes the following FP cmov instructions:
/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
static bool hasFPCMov(unsigned X86CC) {
switch (X86CC) {
@@ -3917,7 +4156,7 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
-/// isFPImmLegal - Returns true if the target can instruction select the
+/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -3970,7 +4209,7 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
-/// isUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size is undef.
static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
@@ -3979,19 +4218,18 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
return true;
}
-/// isUndefOrInRange - Return true if Val is undef or if its value falls within
-/// the specified range (L, H].
+/// Return true if Val is undef or if its value falls within the
+/// specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
return (Val < 0) || (Val >= Low && Val < Hi);
}
-/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
-/// specified value.
+/// Val is either less than zero (undef) or equal to the specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return (Val < 0 || Val == CmpVal);
}
-/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
/// from position Pos and ending in Pos+Size, falls within the specified
/// sequential range (Low, Low+Size]. or is undef.
static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
@@ -4002,9 +4240,8 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
return true;
}
-/// isVEXTRACTIndex - Return true if the specified
-/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
-/// suitable for instruction that extract 128 or 256 bit vectors
+/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
+/// extract that is suitable for instruction that extract 128 or 256 bit vectors
static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -4021,7 +4258,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
return Result;
}
-/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
+/// Return true if the specified INSERT_SUBVECTOR
/// operand specifies a subvector insert that is suitable for input to
/// insertion of 128 or 256-bit subvectors
static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
@@ -4057,8 +4294,8 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
- if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
- llvm_unreachable("Illegal extract subvector for VEXTRACT");
+ assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
+ "Illegal extract subvector for VEXTRACT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
@@ -4072,8 +4309,8 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
- if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
- llvm_unreachable("Illegal insert subvector for VINSERT");
+ assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
+ "Illegal insert subvector for VINSERT");
uint64_t Index =
cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
@@ -4085,53 +4322,71 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
return Index / NumElemsPerChunk;
}
-/// getExtractVEXTRACT128Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 128);
}
-/// getExtractVEXTRACT256Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
return getExtractVEXTRACTImmediate(N, 256);
}
-/// getInsertVINSERT128Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 128);
}
-/// getInsertVINSERT256Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
-/// isZero - Returns true if Elt is a constant integer zero
-static bool isZero(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isNullValue();
-}
-
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
+/// Returns true if Elt is a constant zero or a floating point constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- if (isZero(Elt))
- return true;
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
- return CFP->getValueAPF().isPosZero();
- return false;
+ return isNullConstant(Elt) || isNullFPConstant(Elt);
}
-/// getZeroVector - Returns a vector of specified type with all zero elements.
-///
-static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+// Build a vector of constants
+// Use an UNDEF node if MaskElt == -1.
+// Spilt 64-bit constants in the 32-bit mode.
+static SDValue getConstVector(ArrayRef<int> Values, MVT VT,
+ SelectionDAG &DAG,
+ SDLoc dl, bool IsMask = false) {
+
+ SmallVector<SDValue, 32> Ops;
+ bool Split = false;
+
+ MVT ConstVecVT = VT;
+ unsigned NumElts = VT.getVectorNumElements();
+ bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
+ if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
+ ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
+ Split = true;
+ }
+
+ MVT EltVT = ConstVecVT.getVectorElementType();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ bool IsUndef = Values[i] < 0 && IsMask;
+ SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
+ DAG.getConstant(Values[i], dl, EltVT);
+ Ops.push_back(OpNode);
+ if (Split)
+ Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
+ DAG.getConstant(0, dl, EltVT));
+ }
+ SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops);
+ if (Split)
+ ConstsNode = DAG.getBitcast(VT, ConstsNode);
+ return ConstsNode;
+}
+
+/// Returns a vector of specified type with all zero elements.
+static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
@@ -4163,7 +4418,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
- } else if (VT.getScalarType() == MVT::i1) {
+ } else if (VT.getVectorElementType() == MVT::i1) {
assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
&& "Unexpected vector type");
@@ -4195,19 +4450,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
- ElemsPerChunk));
+ makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
}
@@ -4245,13 +4499,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
// Insert the relevant vectorWidth bits.
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
// This is the index of the first element of the vectorWidth-bit chunk
- // we want.
- unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
- * ElemsPerChunk);
+ // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+ IdxVal &= ~(ElemsPerChunk - 1);
- SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+ SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
}
@@ -4279,7 +4533,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
Vec, ZeroIndex);
// The blend instruction, and therefore its mask, depend on the data type.
- MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+ MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
if (ScalarType.isFloatingPoint()) {
// Choose either vblendps (float) or vblendpd (double).
unsigned ScalarSize = ScalarType.getSizeInBits();
@@ -4316,6 +4570,81 @@ static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
}
+/// Insert i1-subvector to i1-vector.
+static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) {
+
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue SubVec = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+
+ if (!isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
+ return Op;
+
+ MVT OpVT = Op.getSimpleValueType();
+ MVT SubVecVT = SubVec.getSimpleValueType();
+ unsigned NumElems = OpVT.getVectorNumElements();
+ unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
+
+ assert(IdxVal + SubVecNumElems <= NumElems &&
+ IdxVal % SubVecVT.getSizeInBits() == 0 &&
+ "Unexpected index value in INSERT_SUBVECTOR");
+
+ // There are 3 possible cases:
+ // 1. Subvector should be inserted in the lower part (IdxVal == 0)
+ // 2. Subvector should be inserted in the upper part
+ // (IdxVal + SubVecNumElems == NumElems)
+ // 3. Subvector should be inserted in the middle (for example v2i1
+ // to v16i1, index 2)
+
+ SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+ SDValue Undef = DAG.getUNDEF(OpVT);
+ SDValue WideSubVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx);
+ if (Vec.isUndef())
+ return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+
+ if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec,
+ DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec;
+ }
+
+ if (IdxVal == 0) {
+ // Zero lower bits of the Vec
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ // Merge them together
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+ }
+
+ // Simple case when we put subvector in the upper part
+ if (IdxVal + SubVecNumElems == NumElems) {
+ // Zero upper bits of the Vec
+ WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+ Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+ return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+ }
+ // Subvector should be inserted in the middle - use shuffle
+ SmallVector<int, 64> Mask;
+ for (unsigned i = 0; i < NumElems; ++i)
+ Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
+ i : i + NumElems);
+ return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
+}
+
/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
/// instructions. This is used because creating CONCAT_VECTOR nodes of
/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
@@ -4334,18 +4663,22 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Returns a vector of specified type with all bits set.
/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
- SDLoc dl) {
+static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, SDLoc dl) {
assert(VT.isVector() && "Expected a vector type");
SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
SDValue Vec;
- if (VT.is256BitVector()) {
- if (HasInt256) { // AVX2
+ if (VT.is512BitVector()) {
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+ Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
+ } else if (VT.is256BitVector()) {
+ if (Subtarget->hasInt256()) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else { // AVX
@@ -4360,19 +4693,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
return DAG.getBitcast(VT, Vec);
}
-/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
-/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
- SDValue V2) {
- unsigned NumElems = VT.getVectorNumElements();
- SmallVector<int, 8> Mask;
- Mask.push_back(NumElems);
- for (unsigned i = 1; i != NumElems; ++i)
- Mask.push_back(i);
- return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
-}
-
-/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+/// Returns a vector_shuffle node for an unpackl operation.
static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4384,7 +4705,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
+/// Returns a vector_shuffle node for an unpackh operation.
static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
SDValue V2) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4396,10 +4717,10 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector. This produces a shuffle where the low
-/// element of V2 is swizzled into the zero/undef vector, landing at element
-/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
+/// Return a vector_shuffle of the specified vector of zero or undef vector.
+/// This produces a shuffle where the low element of V2 is swizzled into the
+/// zero/undef vector, landing at element Idx.
+/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
bool IsZero,
const X86Subtarget *Subtarget,
@@ -4415,10 +4736,10 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
}
-/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
-/// target specific opcode. Returns true if the Mask could be calculated. Sets
-/// IsUnary to true if only uses one source. Note that this will set IsUnary for
-/// shuffles which use a single input multiple times, and in those cases it will
+/// Calculates the shuffle mask corresponding to the target-specific opcode.
+/// Returns true if the Mask could be calculated. Sets IsUnary to true if only
+/// uses one source. Note that this will set IsUnary for shuffles which use a
+/// single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
@@ -4482,7 +4803,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
// If we have a build-vector, then things are easy.
- EVT VT = MaskNode.getValueType();
+ MVT VT = MaskNode.getSimpleValueType();
assert(VT.isVector() &&
"Can't produce a non-vector with a build_vector!");
if (!VT.isInteger())
@@ -4572,6 +4893,119 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::MOVLPS:
// Not yet implemented
return false;
+ case X86ISD::VPERMV: {
+ IsUnary = true;
+ SDValue MaskNode = N->getOperand(0);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(0);
+
+ unsigned MaskLoBits = Log2_64(VT.getVectorNumElements());
+ SmallVector<uint64_t, 32> RawMask;
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ assert(MaskNode.getSimpleValueType().isInteger() &&
+ MaskNode.getSimpleValueType().getVectorNumElements() ==
+ VT.getVectorNumElements());
+
+ for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF)
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ else if (isa<ConstantSDNode>(Op)) {
+ APInt MaskElement = cast<ConstantSDNode>(Op)->getAPIntValue();
+ RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+ } else
+ return false;
+ }
+ DecodeVPERMVMask(RawMask, Mask);
+ break;
+ }
+ if (MaskNode->getOpcode() == X86ISD::VBROADCAST) {
+ unsigned NumEltsInMask = MaskNode->getNumOperands();
+ MaskNode = MaskNode->getOperand(0);
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode)) {
+ APInt MaskEltValue = CN->getAPIntValue();
+ for (unsigned i = 0; i < NumEltsInMask; ++i)
+ RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue());
+ DecodeVPERMVMask(RawMask, Mask);
+ break;
+ }
+ // It may be a scalar load
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ DecodeVPERMVMask(C, VT, Mask);
+ if (Mask.empty())
+ return false;
+ break;
+ }
+ return false;
+ }
+ case X86ISD::VPERMV3: {
+ IsUnary = false;
+ SDValue MaskNode = N->getOperand(1);
+ while (MaskNode->getOpcode() == ISD::BITCAST)
+ MaskNode = MaskNode->getOperand(1);
+
+ if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+ // If we have a build-vector, then things are easy.
+ assert(MaskNode.getSimpleValueType().isInteger() &&
+ MaskNode.getSimpleValueType().getVectorNumElements() ==
+ VT.getVectorNumElements());
+
+ SmallVector<uint64_t, 32> RawMask;
+ unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2);
+
+ for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+ SDValue Op = MaskNode->getOperand(i);
+ if (Op->getOpcode() == ISD::UNDEF)
+ RawMask.push_back((uint64_t)SM_SentinelUndef);
+ else {
+ auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
+ if (!CN)
+ return false;
+ APInt MaskElement = CN->getAPIntValue();
+ RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+ }
+ }
+ DecodeVPERMV3Mask(RawMask, Mask);
+ break;
+ }
+
+ auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+ if (!MaskLoad)
+ return false;
+
+ SDValue Ptr = MaskLoad->getBasePtr();
+ if (Ptr->getOpcode() == X86ISD::Wrapper ||
+ Ptr->getOpcode() == X86ISD::WrapperRIP)
+ Ptr = Ptr->getOperand(0);
+
+ auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+ return false;
+
+ if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+ DecodeVPERMV3Mask(C, VT, Mask);
+ if (Mask.empty())
+ return false;
+ break;
+ }
+ return false;
+ }
default: llvm_unreachable("unknown target shuffle node");
}
@@ -4586,7 +5020,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
return true;
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
unsigned Depth) {
@@ -4650,8 +5084,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
return SDValue();
}
-/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
-///
+/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
@@ -4721,8 +5154,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
return DAG.getBitcast(MVT::v16i8, V);
}
-/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
-///
+/// Custom lower build_vector of v8i16.
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
@@ -4753,7 +5185,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return V;
}
-/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
+/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget,
const TargetLowering &TLI) {
@@ -4924,7 +5356,7 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
return SDValue();
if ((Offset % RequiredAlign) & 3)
return SDValue();
- int64_t StartOffset = Offset & ~(RequiredAlign-1);
+ int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
if (StartOffset) {
SDLoc DL(Ptr);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -5157,8 +5589,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
// TODO: If multiple splats are generated to load the same constant,
// it may be detrimental to overall size. There needs to be a way to detect
// that condition to know if this is truly a size win.
- const Function *F = DAG.getMachineFunction().getFunction();
- bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// Handle broadcasting a single constant scalar from the constant pool
// into a vector.
@@ -5188,9 +5619,10 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
SDValue CP =
DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
- Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ Ld = DAG.getLoad(
+ CVT, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+ false, false, Alignment);
return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
}
@@ -5329,7 +5761,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
return NV;
}
-static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector");
@@ -5366,7 +5798,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
}
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+ SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getBitcast(VT, Imm);
SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
@@ -5600,7 +6032,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
/// node.
static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- EVT VT = BV->getValueType(0);
+ MVT VT = BV->getSimpleValueType(0);
if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
(!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
return SDValue();
@@ -5662,12 +6094,12 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
// Update InVec0 and InVec1.
if (InVec0.getOpcode() == ISD::UNDEF) {
InVec0 = Op0.getOperand(0);
- if (InVec0.getValueType() != VT)
+ if (InVec0.getSimpleValueType() != VT)
return SDValue();
}
if (InVec1.getOpcode() == ISD::UNDEF) {
InVec1 = Op1.getOperand(0);
- if (InVec1.getValueType() != VT)
+ if (InVec1.getSimpleValueType() != VT)
return SDValue();
}
@@ -5703,7 +6135,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- EVT VT = BV->getValueType(0);
+ MVT VT = BV->getSimpleValueType(0);
unsigned NumElts = VT.getVectorNumElements();
unsigned NumUndefsLO = 0;
unsigned NumUndefsHI = 0;
@@ -5845,7 +6277,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned NumElems = Op.getNumOperands();
// Generate vectors for predicate vectors.
- if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
+ if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512())
return LowerBUILD_VECTORvXi1(Op, DAG);
// Vectors containing all zeros can be matched by pxor and xorps later
@@ -5866,7 +6298,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return Op;
if (!VT.is512BitVector())
- return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+ return getOnesVector(VT, Subtarget, DAG, dl);
}
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
@@ -5881,7 +6313,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
unsigned NumZero = 0;
unsigned NumNonZero = 0;
- unsigned NonZeros = 0;
+ uint64_t NonZeros = 0;
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
for (unsigned i = 0; i < NumElems; ++i) {
@@ -5895,7 +6327,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (X86::isZeroNode(Elt))
NumZero++;
else {
- NonZeros |= (1 << i);
+ assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
+ NonZeros |= ((uint64_t)1 << i);
NumNonZero++;
}
}
@@ -5919,7 +6352,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
- EVT VecVT = MVT::v4i32;
+ MVT VecVT = MVT::v4i32;
// Truncate the value (which may itself be a constant) to i32, and
// convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -6051,7 +6484,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// One half is zero or undef.
unsigned Idx = countTrailingZeros(NonZeros);
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
- Op.getOperand(Idx));
+ Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
}
return SDValue();
@@ -6059,13 +6492,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
- if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
+ DAG, Subtarget, *this))
return V;
if (EVTBits == 16 && NumElems == 8)
- if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
+ DAG, Subtarget, *this))
return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
@@ -6077,7 +6510,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
for (unsigned i = 0; i < 4; ++i) {
- bool isZero = !(NonZeros & (1 << i));
+ bool isZero = !(NonZeros & (1ULL << i));
if (isZero)
V[i] = getZeroVector(VT, Subtarget, DAG, dl);
else
@@ -6177,7 +6610,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// 256-bit AVX can use the vinsertf128 instruction
// to create 256-bit vectors from two other 128-bit ones.
static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -6193,8 +6626,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
if (Op.getNumOperands() == 4) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
- ResVT.getVectorNumElements()/2);
+ MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+ ResVT.getVectorNumElements()/2);
SDValue V3 = Op.getOperand(2);
SDValue V4 = Op.getOperand(3);
return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
@@ -6213,8 +6646,27 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
assert(isPowerOf2_32(NumOfOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
+ SDValue Undef = DAG.getUNDEF(ResVT);
if (NumOfOperands > 2) {
- MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+ // Specialize the cases when all, or all but one, of the operands are undef.
+ unsigned NumOfDefinedOps = 0;
+ unsigned OpIdx = 0;
+ for (unsigned i = 0; i < NumOfOperands; i++)
+ if (!Op.getOperand(i).isUndef()) {
+ NumOfDefinedOps++;
+ OpIdx = i;
+ }
+ if (NumOfDefinedOps == 0)
+ return Undef;
+ if (NumOfDefinedOps == 1) {
+ unsigned SubVecNumElts =
+ Op.getOperand(OpIdx).getValueType().getVectorNumElements();
+ SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
+ Op.getOperand(OpIdx), IdxVal);
+ }
+
+ MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
ResVT.getVectorNumElements()/2);
SmallVector<SDValue, 2> Ops;
for (unsigned i = 0; i < NumOfOperands/2; i++)
@@ -6227,31 +6679,38 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
+ // 2 operands
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
+ unsigned NumElems = ResVT.getVectorNumElements();
+ assert(V1.getValueType() == V2.getValueType() &&
+ V1.getValueType().getVectorNumElements() == NumElems/2 &&
+ "Unexpected operands in CONCAT_VECTORS");
+
+ if (ResVT.getSizeInBits() >= 16)
+ return Op; // The operation is legal with KUNPCK
+
bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
-
+ SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
if (IsZeroV1 && IsZeroV2)
- return getZeroVector(ResVT, Subtarget, DAG, dl);
+ return ZeroVec;
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(ResVT);
- unsigned NumElems = ResVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+ if (V2.isUndef())
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+ if (IsZeroV2)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);
+
+ SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
+ if (V1.isUndef())
+ V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
- V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
- V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
if (IsZeroV1)
- return V2;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
- // Zero the upper bits of V1
- V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
- V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
- if (IsZeroV2)
- return V1;
- return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
}
static SDValue LowerCONCAT_VECTORS(SDValue Op,
@@ -6272,7 +6731,6 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op,
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
-
//===----------------------------------------------------------------------===//
// Vector shuffle lowering
//
@@ -6422,6 +6880,127 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
return DAG.getConstant(Imm, DL, MVT::i8);
}
+/// \brief Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ SmallBitVector Zeroable(Mask.size(), false);
+
+ while (V1.getOpcode() == ISD::BITCAST)
+ V1 = V1->getOperand(0);
+ while (V2.getOpcode() == ISD::BITCAST)
+ V2 = V2->getOperand(0);
+
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ int M = Mask[i];
+ // Handle the easy cases.
+ if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+ Zeroable[i] = true;
+ continue;
+ }
+
+ // If this is an index into a build_vector node (which has the same number
+ // of elements), dig out the input value and use it.
+ SDValue V = M < Size ? V1 : V2;
+ if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
+ continue;
+
+ SDValue Input = V.getOperand(M % Size);
+ // The UNDEF opcode check really should be dead code here, but not quite
+ // worth asserting on (it isn't invalid, just unexpected).
+ if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
+ Zeroable[i] = true;
+ }
+
+ return Zeroable;
+}
+
+// X86 has dedicated unpack instructions that can handle specific blend
+// operations: UNPCKH and UNPCKL.
+static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+ int NumElts = VT.getVectorNumElements();
+ int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+ SmallVector<int, 8> Unpckl;
+ SmallVector<int, 8> Unpckh;
+
+ for (int i = 0; i < NumElts; ++i) {
+ unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+ int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
+ int HiPos = LoPos + NumEltsInLane / 2;
+ Unpckl.push_back(LoPos);
+ Unpckh.push_back(HiPos);
+ }
+
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+
+ // Commute and try again.
+ ShuffleVectorSDNode::commuteMask(Unpckl);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+ return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
+
+ ShuffleVectorSDNode::commuteMask(Unpckh);
+ if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
+
+ return SDValue();
+}
+
+/// \brief Try to emit a bitmask instruction for a shuffle.
+///
+/// This handles cases where we can model a blend exactly as a bitmask due to
+/// one of the inputs being zeroable.
+static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ MVT EltVT = VT.getVectorElementType();
+ int NumEltBits = EltVT.getSizeInBits();
+ MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
+ SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
+ SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
+ IntEltVT);
+ if (EltVT.isFloatingPoint()) {
+ Zero = DAG.getBitcast(EltVT, Zero);
+ AllOnes = DAG.getBitcast(EltVT, AllOnes);
+ }
+ SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ SDValue V;
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Zeroable[i])
+ continue;
+ if (Mask[i] % Size != i)
+ return SDValue(); // Not a blend.
+ if (!V)
+ V = Mask[i] < Size ? V1 : V2;
+ else if (V != (Mask[i] < Size ? V1 : V2))
+ return SDValue(); // Can only let one input through the mask.
+
+ VMaskOps[i] = AllOnes;
+ }
+ if (!V)
+ return SDValue(); // No non-zeroable elements!
+
+ SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
+ V = DAG.getNode(VT.isFloatingPoint()
+ ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
+ DL, VT, V, VMask);
+ return V;
+}
+
/// \brief Try to emit a blend instruction for a shuffle using bit math.
///
/// This is used as a fallback approach when first class blend instructions are
@@ -6431,7 +7010,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
- MVT EltVT = VT.getScalarType();
+ MVT EltVT = VT.getVectorElementType();
int NumEltBits = EltVT.getSizeInBits();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
@@ -6458,22 +7037,62 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
/// This doesn't do any checks for the availability of instructions for blending
/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
/// be matched in the backend with the type given. What it does check for is
-/// that the shuffle mask is in fact a blend.
+/// that the shuffle mask is a blend, or convertible into a blend with zero.
static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
+ SDValue V2, ArrayRef<int> Original,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
+ bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+ SmallVector<int, 8> Mask(Original.begin(), Original.end());
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ bool ForceV1Zero = false, ForceV2Zero = false;
+
+ // Attempt to generate the binary blend mask. If an input is zero then
+ // we can use any lane.
+ // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
unsigned BlendMask = 0;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Mask[i] >= Size) {
- if (Mask[i] != i + Size)
- return SDValue(); // Shuffled V2 input!
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ if (M == i)
+ continue;
+ if (M == i + Size) {
BlendMask |= 1u << i;
continue;
}
- if (Mask[i] >= 0 && Mask[i] != i)
- return SDValue(); // Shuffled V1 input!
+ if (Zeroable[i]) {
+ if (V1IsZero) {
+ ForceV1Zero = true;
+ Mask[i] = i;
+ continue;
+ }
+ if (V2IsZero) {
+ ForceV2Zero = true;
+ BlendMask |= 1u << i;
+ Mask[i] = i + Size;
+ continue;
+ }
+ }
+ return SDValue(); // Shuffled input!
}
+
+ // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
+ if (ForceV1Zero)
+ V1 = getZeroVector(VT, Subtarget, DAG, DL);
+ if (ForceV2Zero)
+ V2 = getZeroVector(VT, Subtarget, DAG, DL);
+
+ auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) {
+ unsigned ScaledMask = 0;
+ for (int i = 0; i != Size; ++i)
+ if (BlendMask & (1u << i))
+ for (int j = 0; j != Scale; ++j)
+ ScaledMask |= 1u << (i * Scale + j);
+ return ScaledMask;
+ };
+
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
@@ -6493,12 +7112,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
if (Subtarget->hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i)
- if (Mask[i] >= Size)
- for (int j = 0; j < Scale; ++j)
- BlendMask |= 1u << (i * Scale + j);
-
+ BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
@@ -6511,12 +7125,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
- BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i)
- if (Mask[i] >= Size)
- for (int j = 0; j < Scale; ++j)
- BlendMask |= 1u << (i * Scale + j);
-
+ BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
@@ -6541,9 +7150,13 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
// FALLTHROUGH
case MVT::v16i8:
case MVT::v32i8: {
- assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+ assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+ if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+ return Masked;
+
// Scale the blend by the number of bytes per element.
int Scale = VT.getScalarSizeInBits() / 8;
@@ -6760,11 +7373,11 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
Hi = DAG.getBitcast(AlignVT, Hi);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+ VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
}
- assert(VT.getSizeInBits() == 128 &&
+ assert(VT.is128BitVector() &&
"Rotate-based lowering only supports 128-bit lowering!");
assert(Mask.size() <= 16 &&
"Can shuffle at most 16 bytes in a 128-bit vector!");
@@ -6785,92 +7398,6 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
}
-/// \brief Compute whether each element of a shuffle is zeroable.
-///
-/// A "zeroable" vector shuffle element is one which can be lowered to zero.
-/// Either it is an undef element in the shuffle mask, the element of the input
-/// referenced is undef, or the element of the input referenced is known to be
-/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
-/// as many lanes with this technique as possible to simplify the remaining
-/// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- SmallBitVector Zeroable(Mask.size(), false);
-
- while (V1.getOpcode() == ISD::BITCAST)
- V1 = V1->getOperand(0);
- while (V2.getOpcode() == ISD::BITCAST)
- V2 = V2->getOperand(0);
-
- bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
-
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- // Handle the easy cases.
- if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable[i] = true;
- continue;
- }
-
- // If this is an index into a build_vector node (which has the same number
- // of elements), dig out the input value and use it.
- SDValue V = M < Size ? V1 : V2;
- if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
- continue;
-
- SDValue Input = V.getOperand(M % Size);
- // The UNDEF opcode check really should be dead code here, but not quite
- // worth asserting on (it isn't invalid, just unexpected).
- if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
- Zeroable[i] = true;
- }
-
- return Zeroable;
-}
-
-/// \brief Try to emit a bitmask instruction for a shuffle.
-///
-/// This handles cases where we can model a blend exactly as a bitmask due to
-/// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
- MVT EltVT = VT.getScalarType();
- int NumEltBits = EltVT.getSizeInBits();
- MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
- SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
- SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
- IntEltVT);
- if (EltVT.isFloatingPoint()) {
- Zero = DAG.getBitcast(EltVT, Zero);
- AllOnes = DAG.getBitcast(EltVT, AllOnes);
- }
- SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- SDValue V;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- if (Zeroable[i])
- continue;
- if (Mask[i] % Size != i)
- return SDValue(); // Not a blend.
- if (!V)
- V = Mask[i] < Size ? V1 : V2;
- else if (V != (Mask[i] < Size ? V1 : V2))
- return SDValue(); // Can only let one input through the mask.
-
- VMaskOps[i] = AllOnes;
- }
- if (!V)
- return SDValue(); // No non-zeroable elements!
-
- SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
- V = DAG.getNode(VT.isFloatingPoint()
- ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
- DL, VT, V, VMask);
- return V;
-}
-
/// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
///
/// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -6982,7 +7509,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
// Determine the extraction length from the part of the
// lower half that isn't zeroable.
int Len = HalfSize;
- for (; Len >= 0; --Len)
+ for (; Len > 0; --Len)
if (!Zeroable[Len - 1])
break;
assert(Len > 0 && "Zeroable shuffle mask");
@@ -6997,8 +7524,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
SDValue &V = (M < Size ? V1 : V2);
M = M % Size;
- // All mask elements must be in the lower half.
- if (M > HalfSize)
+ // The extracted elements must start at a valid index and all mask
+ // elements must be in the lower half.
+ if (i > M || M >= HalfSize)
return SDValue();
if (Idx < 0 || (Src == V && Idx == (M - i))) {
@@ -7095,64 +7623,104 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
///
/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available
-/// features of the subtarget.
+/// features of the subtarget. The extended elements are consecutive and
+/// begin and can start from an offseted element index in the input; to
+/// avoid excess shuffling the offset must either being in the bottom lane
+/// or at the start of a higher lane. All extended elements must be from
+/// the same lane.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
+ SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
- int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
+ int NumElements = VT.getVectorNumElements();
+ int NumEltsPerLane = 128 / EltBits;
+ int OffsetLane = Offset / NumEltsPerLane;
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
"Only 8, 16, and 32 bit elements can be extended.");
assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
+ assert(0 <= Offset && "Extension offset must be positive.");
+ assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) &&
+ "Extension offset must be in the first lane or start an upper lane.");
+
+ // Check that an index is in same lane as the base offset.
+ auto SafeOffset = [&](int Idx) {
+ return OffsetLane == (Idx / NumEltsPerLane);
+ };
+
+ // Shift along an input so that the offset base moves to the first element.
+ auto ShuffleOffset = [&](SDValue V) {
+ if (!Offset)
+ return V;
+
+ SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+ for (int i = 0; i * Scale < NumElements; ++i) {
+ int SrcIdx = i + Offset;
+ ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
+ }
+ return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
+ };
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
if (Subtarget->hasSSE41()) {
+ // Not worth offseting 128-bit vectors if scale == 2, a pattern using
+ // PUNPCK will catch this in a later shuffle match.
+ if (Offset && Scale == 2 && VT.is128BitVector())
+ return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+ InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV));
+ return DAG.getBitcast(VT, InputV);
}
+ assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
+
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
if (AnyExt && EltBits == 32) {
- int PSHUFDMask[4] = {0, -1, 1, -1};
+ int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
+ -1};
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
}
if (AnyExt && EltBits == 16 && Scale > 2) {
- int PSHUFDMask[4] = {0, -1, 0, -1};
+ int PSHUFDMask[4] = {Offset / 2, -1,
+ SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
DAG.getBitcast(MVT::v4i32, InputV),
getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
- int PSHUFHWMask[4] = {1, -1, -1, -1};
+ int PSHUFWMask[4] = {1, -1, -1, -1};
+ unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
return DAG.getBitcast(
- VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+ VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
DAG.getBitcast(MVT::v8i16, InputV),
- getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
+ getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
}
// The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
// to 64-bits.
if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
- assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+ assert(VT.is128BitVector() && "Unexpected vector width!");
+ int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(0, DL, MVT::i8)));
- if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+ DAG.getConstant(LoIdx, DL, MVT::i8)));
+
+ if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
+ !SafeOffset(Offset + 1))
return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
- SDValue Hi =
- DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
- DAG.getConstant(EltBits, DL, MVT::i8),
- DAG.getConstant(EltBits, DL, MVT::i8)));
+ int HiIdx = (Offset + 1) * EltBits;
+ SDValue Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(HiIdx, DL, MVT::i8)));
return DAG.getNode(ISD::BITCAST, DL, VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
}
@@ -7163,9 +7731,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
assert(NumElements == 16 && "Unexpected byte vector width!");
SDValue PSHUFBMask[16];
- for (int i = 0; i < 16; ++i)
- PSHUFBMask[i] =
- DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
+ for (int i = 0; i < 16; ++i) {
+ int Idx = Offset + (i / Scale);
+ PSHUFBMask[i] = DAG.getConstant(
+ (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
+ }
InputV = DAG.getBitcast(MVT::v16i8, InputV);
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
@@ -7173,13 +7743,30 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
MVT::v16i8, PSHUFBMask)));
}
+ // If we are extending from an offset, ensure we start on a boundary that
+ // we can unpack from.
+ int AlignToUnpack = Offset % (NumElements / Scale);
+ if (AlignToUnpack) {
+ SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+ for (int i = AlignToUnpack; i < NumElements; ++i)
+ ShMask[i - AlignToUnpack] = i;
+ InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
+ Offset -= AlignToUnpack;
+ }
+
// Otherwise emit a sequence of unpacks.
do {
+ unsigned UnpackLoHi = X86ISD::UNPCKL;
+ if (Offset >= (NumElements / 2)) {
+ UnpackLoHi = X86ISD::UNPCKH;
+ Offset -= (NumElements / 2);
+ }
+
MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
: getZeroVector(InputVT, Subtarget, DAG, DL);
InputV = DAG.getBitcast(InputVT, InputV);
- InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
+ InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
Scale /= 2;
EltBits *= 2;
NumElements /= 2;
@@ -7205,7 +7792,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
int Bits = VT.getSizeInBits();
+ int NumLanes = Bits / 128;
int NumElements = VT.getVectorNumElements();
+ int NumEltsPerLane = NumElements / NumLanes;
assert(VT.getScalarSizeInBits() <= 32 &&
"Exceeds 32-bit integer zero extension limit");
assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
@@ -7215,8 +7804,11 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
auto Lower = [&](int Scale) -> SDValue {
SDValue InputV;
bool AnyExt = true;
+ int Offset = 0;
+ int Matches = 0;
for (int i = 0; i < NumElements; ++i) {
- if (Mask[i] == -1)
+ int M = Mask[i];
+ if (M == -1)
continue; // Valid anywhere but doesn't tell us anything.
if (i % Scale != 0) {
// Each of the extended elements need to be zeroable.
@@ -7230,14 +7822,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
// Each of the base elements needs to be consecutive indices into the
// same input vector.
- SDValue V = Mask[i] < NumElements ? V1 : V2;
- if (!InputV)
+ SDValue V = M < NumElements ? V1 : V2;
+ M = M % NumElements;
+ if (!InputV) {
InputV = V;
- else if (InputV != V)
+ Offset = M - (i / Scale);
+ } else if (InputV != V)
return SDValue(); // Flip-flopping inputs.
- if (Mask[i] % NumElements != i / Scale)
+ // Offset must start in the lowest 128-bit lane or at the start of an
+ // upper lane.
+ // FIXME: Is it ever worth allowing a negative base offset?
+ if (!((0 <= Offset && Offset < NumEltsPerLane) ||
+ (Offset % NumEltsPerLane) == 0))
+ return SDValue();
+
+ // If we are offsetting, all referenced entries must come from the same
+ // lane.
+ if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
+ return SDValue();
+
+ if ((M % NumElements) != (Offset + (i / Scale)))
return SDValue(); // Non-consecutive strided elements.
+ Matches++;
}
// If we fail to find an input, we have a zero-shuffle which should always
@@ -7246,8 +7853,13 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
if (!InputV)
return SDValue();
+ // If we are offsetting, don't extend if we only match a single input, we
+ // can always do better by using a basic PSHUF or PUNPCK.
+ if (Offset != 0 && Matches < 2)
+ return SDValue();
+
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
+ DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -7355,8 +7967,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
// all the smarts here sunk into that routine. However, the current
// lowering of BUILD_VECTOR makes that nearly impossible until the old
// vector shuffle lowering is dead.
- if (SDValue V2S = getScalarValueForVectorElement(
- V2, Mask[V2Index] - Mask.size(), DAG)) {
+ SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
+ DAG);
+ if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
@@ -7431,11 +8044,65 @@ static SDValue lowerVectorShuffleAsElementInsertion(
return V2;
}
+/// \brief Try to lower broadcast of a single - truncated - integer element,
+/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
+///
+/// This assumes we have AVX2.
+static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
+ int BroadcastIdx,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(Subtarget->hasAVX2() &&
+ "We can only lower integer broadcasts with AVX2!");
+
+ EVT EltVT = VT.getVectorElementType();
+ EVT V0VT = V0.getValueType();
+
+ assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
+ assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");
+
+ EVT V0EltVT = V0VT.getVectorElementType();
+ if (!V0EltVT.isInteger())
+ return SDValue();
+
+ const unsigned EltSize = EltVT.getSizeInBits();
+ const unsigned V0EltSize = V0EltVT.getSizeInBits();
+
+ // This is only a truncation if the original element type is larger.
+ if (V0EltSize <= EltSize)
+ return SDValue();
+
+ assert(((V0EltSize % EltSize) == 0) &&
+ "Scalar type sizes must all be powers of 2 on x86!");
+
+ const unsigned V0Opc = V0.getOpcode();
+ const unsigned Scale = V0EltSize / EltSize;
+ const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
+
+ if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
+ V0Opc != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ SDValue Scalar = V0.getOperand(V0BroadcastIdx);
+
+ // If we're extracting non-least-significant bits, shift so we can truncate.
+ // Hopefully, we can fold away the trunc/srl/load into the broadcast.
+ // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
+ // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
+ if (const int OffsetIdx = BroadcastIdx % Scale)
+ Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
+ DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));
+
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+ DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
+}
+
/// \brief Try to lower broadcast of a single element.
///
/// For convenience, this code also bundles all of the subtarget feature set
/// filtering. While a little annoying to re-dispatch on type here, there isn't
/// a convenient way to factor it out.
+/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
ArrayRef<int> Mask,
const X86Subtarget *Subtarget,
@@ -7476,7 +8143,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
int BeginIdx = (int)ConstantIdx->getZExtValue();
int EndIdx =
- BeginIdx + (int)VInner.getValueType().getVectorNumElements();
+ BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
BroadcastIdx -= BeginIdx;
V = VInner;
@@ -7491,6 +8158,15 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
+ // First, look through bitcast: if the original value has a larger element
+ // type than the shuffle, the broadcast element is in essence truncated.
+ // Make that explicit to ease folding.
+ if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
+ if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
+ DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
+ return TruncBroadcast;
+
+ // Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
@@ -7499,6 +8175,20 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
+ } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ // If we are broadcasting a load that is only used by the shuffle
+ // then we can reduce the vector load to the broadcasted scalar load.
+ LoadSDNode *Ld = cast<LoadSDNode>(V);
+ SDValue BaseAddr = Ld->getOperand(1);
+ EVT AddrVT = BaseAddr.getValueType();
+ EVT SVT = VT.getScalarType();
+ unsigned Offset = BroadcastIdx * SVT.getStoreSize();
+ SDValue NewAddr = DAG.getNode(
+ ISD::ADD, DL, AddrVT, BaseAddr,
+ DAG.getConstant(Offset, DL, AddrVT));
+ V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
+ DAG.getMachineFunction().getMachineMemOperand(
+ Ld->getMemOperand(), Offset, SVT.getStoreSize()));
} else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
// We can't broadcast from a vector register without AVX2, and we can only
// broadcast from the zero-element of a vector register.
@@ -7595,9 +8285,10 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
/// because for floating point vectors we have a generalized SHUFPS lowering
/// strategy that handles everything that doesn't *exactly* match an unpack,
/// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() &&
"This routine only supports integer vectors.");
assert(!isSingleInputShuffleMask(Mask) &&
@@ -7774,10 +8465,9 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
+ return V;
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
@@ -7869,10 +8559,9 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8077,14 +8766,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
+ return V;
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
@@ -8161,14 +8845,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8184,8 +8863,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
Mask, DAG);
// Try to lower by permuting the inputs into an unpack instruction.
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1,
+ V2, Mask, DAG))
return Unpack;
// We implement this with SHUFPS because it can blend from two vectors.
@@ -8218,7 +8897,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+ assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
@@ -8286,16 +8965,18 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
assert(AToAInputs.size() + BToAInputs.size() == 4 &&
"Must call this with either 3:1 or 1:3 inputs (summing to 4).");
+ bool ThreeAInputs = AToAInputs.size() == 3;
+
// Compute the index of dword with only one word among the three inputs in
// a half by taking the sum of the half with three inputs and subtracting
// the sum of the actual three inputs. The difference is the remaining
// slot.
int ADWord, BDWord;
- int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
- int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
- int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
- ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
- int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
+ int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
+ int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
+ int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
+ ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
+ int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
int TripleNonInputIdx =
TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
@@ -8364,8 +9045,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
} else {
assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
- int APinnedIdx =
- AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+ int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
}
}
@@ -8751,10 +9431,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V1, Mask, {0, 0, 1, 1, 2, 2, 3, 3}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V1);
- if (isShuffleEquivalent(V1, V1, Mask, {4, 4, 5, 5, 6, 6, 7, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
@@ -8798,10 +9477,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Masked;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 2, 10, 3, 11}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 12, 5, 13, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+ return V;
// Try to use byte rotation instructions.
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
@@ -8812,8 +9490,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
return BitBlend;
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
+ V2, Mask, DAG))
return Unpack;
// If we can't directly blend but can use PSHUFB, that will be better as it
@@ -9037,17 +9715,14 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return V;
}
+ if (SDValue Masked =
+ lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return Masked;
+
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
- 0, 16, 1, 17, 2, 18, 3, 19,
- // High half.
- 4, 20, 5, 21, 6, 22, 7, 23}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
- 8, 24, 9, 25, 10, 26, 11, 27,
- // High half.
- 12, 28, 13, 29, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+ return V;
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
@@ -9086,8 +9761,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
// FIXME: It might be worth trying to detect if the unpack-feeding
// shuffles will both be pshufb, in which case we shouldn't bother with
// this.
- if (SDValue Unpack =
- lowerVectorShuffleAsUnpack(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+ DL, MVT::v16i8, V1, V2, Mask, DAG))
return Unpack;
}
@@ -9296,7 +9971,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
int NumElements = VT.getVectorNumElements();
int SplitNumElements = NumElements / 2;
- MVT ScalarVT = VT.getScalarType();
+ MVT ScalarVT = VT.getVectorElementType();
MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
// Rather than splitting build-vectors, just build two narrower build
@@ -9308,7 +9983,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
MVT OrigVT = V.getSimpleValueType();
int OrigNumElements = OrigVT.getVectorNumElements();
int OrigSplitNumElements = OrigNumElements / 2;
- MVT OrigScalarVT = OrigVT.getScalarType();
+ MVT OrigScalarVT = OrigVT.getVectorElementType();
MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
SDValue LoV, HiV;
@@ -9478,7 +10153,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
// FIXME: This should probably be generalized for 512-bit vectors as well.
- assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+ assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
int LaneSize = Mask.size() / 2;
// If there are only inputs from one 128-bit lane, splitting will in fact be
@@ -9682,6 +10357,108 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
}
+/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+
+ bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
+ bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
+ if (!UndefLower && !UndefUpper)
+ return SDValue();
+
+ // Upper half is undef and lower half is whole upper subvector.
+ // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+ if (UndefUpper &&
+ isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
+ // Lower half is undef and upper half is whole lower subvector.
+ // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+ if (UndefLower &&
+ isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+ DAG.getIntPtrConstant(HalfNumElts, DL));
+ }
+
+ // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
+ if (UndefLower && Subtarget->hasAVX2() &&
+ (VT == MVT::v4f64 || VT == MVT::v4i64))
+ return SDValue();
+
+ // If the shuffle only uses the lower halves of the input operands,
+ // then extract them and perform the 'half' shuffle at half width.
+ // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
+ int HalfIdx1 = -1, HalfIdx2 = -1;
+ SmallVector<int, 8> HalfMask;
+ unsigned Offset = UndefLower ? HalfNumElts : 0;
+ for (unsigned i = 0; i != HalfNumElts; ++i) {
+ int M = Mask[i + Offset];
+ if (M < 0) {
+ HalfMask.push_back(M);
+ continue;
+ }
+
+ // Determine which of the 4 half vectors this element is from.
+ // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
+ int HalfIdx = M / HalfNumElts;
+
+ // Only shuffle using the lower halves of the inputs.
+ // TODO: Investigate usefulness of shuffling with upper halves.
+ if (HalfIdx != 0 && HalfIdx != 2)
+ return SDValue();
+
+ // Determine the element index into its half vector source.
+ int HalfElt = M % HalfNumElts;
+
+ // We can shuffle with up to 2 half vectors, set the new 'half'
+ // shuffle mask accordingly.
+ if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) {
+ HalfMask.push_back(HalfElt);
+ HalfIdx1 = HalfIdx;
+ continue;
+ }
+ if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) {
+ HalfMask.push_back(HalfElt + HalfNumElts);
+ HalfIdx2 = HalfIdx;
+ continue;
+ }
+
+ // Too many half vectors referenced.
+ return SDValue();
+ }
+ assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+ auto GetHalfVector = [&](int HalfIdx) {
+ if (HalfIdx < 0)
+ return DAG.getUNDEF(HalfVT);
+ SDValue V = (HalfIdx < 2 ? V1 : V2);
+ HalfIdx = (HalfIdx % 2) * HalfNumElts;
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
+ DAG.getIntPtrConstant(HalfIdx, DL));
+ };
+
+ SDValue Half1 = GetHalfVector(HalfIdx1);
+ SDValue Half2 = GetHalfVector(HalfIdx2);
+ SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
+ DAG.getIntPtrConstant(Offset, DL));
+}
+
/// \brief Test whether the specified input (0 or 1) is in-place blended by the
/// given mask.
///
@@ -9776,16 +10553,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DAG);
}
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
+ return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
@@ -9876,14 +10647,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Shift;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
+ return V;
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -9941,14 +10707,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
+ return V;
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
// have already handled any direct blends. We also need to squash the
@@ -9974,9 +10735,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (Subtarget->hasAVX2())
return DAG.getNode(
X86ISD::VPERMV, DL, MVT::v8f32,
- DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
- MVT::v8i32, VPermMask)),
- V1);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -10041,14 +10800,9 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1);
- if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
+ return V;
}
// Try to use shift instructions.
@@ -10115,18 +10869,9 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane:
- 0, 16, 1, 17, 2, 18, 3, 19,
- // Second 128-bit lane:
- 8, 24, 9, 25, 10, 26, 11, 27}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane:
- 4, 20, 5, 21, 6, 22, 7, 23,
- // Second 128-bit lane:
- 12, 28, 13, 29, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
+ return V;
// Try to use shift instructions.
if (SDValue Shift =
@@ -10215,22 +10960,9 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return Blend;
// Use dedicated unpack instructions for masks that match their pattern.
- // Note that these are repeated 128-bit lane unpacks, not unpacks across all
- // 256-bit lanes.
- if (isShuffleEquivalent(
- V1, V2, Mask,
- {// First 128-bit lane:
- 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
- // Second 128-bit lane:
- 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
- if (isShuffleEquivalent(
- V1, V2, Mask,
- {// First 128-bit lane:
- 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
- // Second 128-bit lane:
- 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+ if (SDValue V =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
+ return V;
// Try to use shift instructions.
if (SDValue Shift =
@@ -10296,12 +11028,17 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, VT, V1, V2, Mask, Subtarget, DAG))
return Insertion;
- // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
- // check for those subtargets here and avoid much of the subtarget querying in
- // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
- // ability to manipulate a 256-bit vector with integer types. Since we'll use
- // floating point types there eventually, just immediately cast everything to
- // a float and operate entirely in that domain.
+ // Handle special cases where the lower or upper half is UNDEF.
+ if (SDValue V =
+ lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
+ // There is a really nice hard cut-over between AVX1 and AVX2 that means we
+ // can check for those subtargets here and avoid much of the subtarget
+ // querying in the per-vector-type lowering routines. With AVX1 we have
+ // essentially *zero* ability to manipulate a 256-bit vector with integer
+ // types. Since we'll use floating point types there eventually, just
+ // immediately cast everything to a float and operate entirely in that domain.
if (VT.isInteger() && !Subtarget->hasAVX2()) {
int ElementBits = VT.getScalarSizeInBits();
if (ElementBits < 32)
@@ -10334,6 +11071,57 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
}
}
+/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+ assert(VT.getScalarSizeInBits() == 64 &&
+ "Unexpected element type size for 128bit shuffle.");
+
+ // To handle 256 bit vector requires VLX and most probably
+ // function lowerV2X128VectorShuffle() is better solution.
+ assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle.");
+
+ SmallVector<int, 4> WidenedMask;
+ if (!canWidenShuffleElements(Mask, WidenedMask))
+ return SDValue();
+
+ // Form a 128-bit permutation.
+ // Convert the 64-bit shuffle mask selection values into 128-bit selection
+ // bits defined by a vshuf64x2 instruction's immediate control byte.
+ unsigned PermMask = 0, Imm = 0;
+ unsigned ControlBitsNum = WidenedMask.size() / 2;
+
+ for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+ if (WidenedMask[i] == SM_SentinelZero)
+ return SDValue();
+
+ // Use first element in place of undef mask.
+ Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+ PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+ }
+
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
+static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
+ ArrayRef<int> Mask, SDValue V1,
+ SDValue V2, SelectionDAG &DAG) {
+
+ assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+
+ MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+ SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+ if (isSingleInputShuffleMask(Mask))
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+ return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
+}
+
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
const X86Subtarget *Subtarget,
@@ -10345,21 +11133,21 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+ if (SDValue Shuf128 =
+ lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ return Shuf128;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+ return Unpck;
+
+ return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
@@ -10367,22 +11155,11 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+ return Unpck;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
@@ -10396,21 +11173,21 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // X86 has dedicated unpack instructions that can handle specific blend
- // operations: UNPCKH and UNPCKL.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+ if (SDValue Shuf128 =
+ lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Shuf128;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+ return Unpck;
+
+ return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
SDLoc DL(Op);
assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
@@ -10418,22 +11195,11 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
- // Use dedicated unpack instructions for masks that match their pattern.
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 0, 16, 1, 17, 4, 20, 5, 21,
- // Second 128-bit lane.
- 8, 24, 9, 25, 12, 28, 13, 29}))
- return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask,
- {// First 128-bit lane.
- 2, 18, 3, 19, 6, 22, 7, 23,
- // Second 128-bit lane.
- 10, 26, 11, 27, 14, 30, 15, 31}))
- return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+ if (SDValue Unpck =
+ lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+ return Unpck;
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10448,8 +11214,7 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
- // FIXME: Implement direct support for this type!
- return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
+ return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
@@ -10517,6 +11282,60 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
}
+// Lower vXi1 vector shuffles.
+// There is no a dedicated instruction on AVX-512 that shuffles the masks.
+// The only way to shuffle bits is to sign-extend the mask vector to SIMD
+// vector, shuffle and then truncate it back.
+static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Subtarget->hasAVX512() &&
+ "Cannot lower 512-bit vectors w/o basic ISA!");
+ MVT ExtVT;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Expected a vector of i1 elements");
+ case MVT::v2i1:
+ ExtVT = MVT::v2i64;
+ break;
+ case MVT::v4i1:
+ ExtVT = MVT::v4i32;
+ break;
+ case MVT::v8i1:
+ ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+ break;
+ case MVT::v16i1:
+ ExtVT = MVT::v16i32;
+ break;
+ case MVT::v32i1:
+ ExtVT = MVT::v32i16;
+ break;
+ case MVT::v64i1:
+ ExtVT = MVT::v64i8;
+ break;
+ }
+
+ if (ISD::isBuildVectorAllZeros(V1.getNode()))
+ V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+ else if (ISD::isBuildVectorAllOnes(V1.getNode()))
+ V1 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ else
+ V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
+
+ if (V2.isUndef())
+ V2 = DAG.getUNDEF(ExtVT);
+ else if (ISD::isBuildVectorAllZeros(V2.getNode()))
+ V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+ else if (ISD::isBuildVectorAllOnes(V2.getNode()))
+ V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ else
+ V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask));
+}
/// \brief Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
@@ -10533,8 +11352,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
MVT VT = Op.getSimpleValueType();
int NumElements = VT.getVectorNumElements();
SDLoc dl(Op);
+ bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);
- assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+ assert((VT.getSizeInBits() != 64 || Is1BitVector) &&
+ "Can't lower MMX shuffles");
bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
@@ -10572,7 +11393,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// elements wider than 64 bits, but it might be interesting to form i128
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
- if (VT.getScalarSizeInBits() < 64 &&
+ if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(Mask, WidenedMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
@@ -10640,17 +11461,17 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
}
// For each vector width, delegate to a specialized lowering routine.
- if (VT.getSizeInBits() == 128)
+ if (VT.is128BitVector())
return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- if (VT.getSizeInBits() == 256)
+ if (VT.is256BitVector())
return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
- // Force AVX-512 vectors to be scalarized for now.
- // FIXME: Implement AVX-512 support!
- if (VT.getSizeInBits() == 512)
+ if (VT.is512BitVector())
return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+ if (Is1BitVector)
+ return lower1BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
llvm_unreachable("Unimplemented!");
}
@@ -10661,11 +11482,16 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
unsigned &MaskValue) {
MaskValue = 0;
unsigned NumElems = BuildVector->getNumOperands();
+
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ // We don't handle the >2 lanes case right now.
unsigned NumLanes = (NumElems - 1) / 8 + 1;
+ if (NumLanes > 2)
+ return false;
+
unsigned NumElemsInLane = NumElems / NumLanes;
- // Blend for v16i16 should be symetric for the both lanes.
+ // Blend for v16i16 should be symmetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
SDValue EltCond = BuildVector->getOperand(i);
SDValue SndLaneEltCond =
@@ -10673,20 +11499,25 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
int Lane1Cond = -1, Lane2Cond = -1;
if (isa<ConstantSDNode>(EltCond))
- Lane1Cond = !isZero(EltCond);
+ Lane1Cond = !isNullConstant(EltCond);
if (isa<ConstantSDNode>(SndLaneEltCond))
- Lane2Cond = !isZero(SndLaneEltCond);
+ Lane2Cond = !isNullConstant(SndLaneEltCond);
+ unsigned LaneMask = 0;
if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
// Lane1Cond != 0, means we want the first argument.
// Lane1Cond == 0, means we want the second argument.
// The encoding of this argument is 0 for the first argument, 1
// for the second. Therefore, invert the condition.
- MaskValue |= !Lane1Cond << i;
+ LaneMask = !Lane1Cond << i;
else if (Lane1Cond < 0)
- MaskValue |= !Lane2Cond << i;
+ LaneMask = !Lane2Cond << i;
else
return false;
+
+ MaskValue |= LaneMask;
+ if (NumLanes == 2)
+ MaskValue |= LaneMask << NumElemsInLane;
}
return true;
}
@@ -10711,7 +11542,8 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
SDValue CondElt = CondBV->getOperand(i);
Mask.push_back(
- isa<ConstantSDNode>(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1);
+ isa<ConstantSDNode>(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0)
+ : -1);
}
return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
}
@@ -10776,9 +11608,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
}
if (VT.getSizeInBits() == 16) {
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
// If Idx is 0, it's cheaper to do a move instead of a pextrw.
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return DAG.getNode(
ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -10801,8 +11632,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDNode *User = *Op.getNode()->use_begin();
if ((User->getOpcode() != ISD::STORE ||
- (isa<ConstantSDNode>(Op.getOperand(1)) &&
- cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+ isNullConstant(Op.getOperand(1))) &&
(User->getOpcode() != ISD::BITCAST ||
User->getValueType(0) != MVT::i32))
return SDValue();
@@ -10900,10 +11730,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT EltVT = VecVT.getVectorElementType();
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+ assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
- //if (IdxVal >= NumElems/2)
- // IdxVal -= NumElems/2;
- IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+ // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+ // this can be done with a mask.
+ IdxVal &= ElemsPerChunk - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getConstant(IdxVal, dl, MVT::i32));
}
@@ -10918,8 +11749,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// TODO: handle v16i8.
if (VT.getSizeInBits() == 16) {
SDValue Vec = Op.getOperand(0);
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec),
@@ -10951,8 +11781,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
// FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
// FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
// to match extract_elt for f64.
- unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- if (Idx == 0)
+ if (isNullConstant(Op.getOperand(1)))
return Op;
// UNPCKHPD the element to the lowest double word, then movsd.
@@ -11039,7 +11868,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Insert the element into the desired chunk.
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
- unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+ assert(isPowerOf2_32(NumEltsIn128));
+ // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+ unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
DAG.getConstant(IdxIn128, dl, MVT::i32));
@@ -11078,8 +11909,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Bits [3:0] of the constant are the zero mask. The DAG Combiner may
// combine either bitwise AND or insert of float 0.0 to set these bits.
- const Function *F = DAG.getMachineFunction().getFunction();
- bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+ bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
@@ -11199,14 +12029,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
// --> load32 addr
if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
- !Subtarget->isUnalignedMem32Slow()) {
- SDValue SubVec2 = Vec.getOperand(1);
- if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
- if (Idx2->getZExtValue() == 0) {
- SDValue Ops[] = { SubVec2, SubVec };
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
+ OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through a bitcast to get to the load.
+ if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+ SubVec2 = SubVec2.getOperand(0);
+
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = { SubVec2, SubVec };
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
}
}
}
@@ -11218,37 +12059,9 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
- if (OpVT.getVectorElementType() == MVT::i1) {
- if (IdxVal == 0 && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
- return Op;
- SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(OpVT);
- unsigned NumElems = OpVT.getVectorNumElements();
- SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
-
- if (IdxVal == OpVT.getVectorNumElements() / 2) {
- // Zero upper bits of the Vec
- Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
-
- SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
- return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
- }
- if (IdxVal == 0) {
- SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
- SubVec, ZeroIdx);
- // Zero upper bits of the Vec2
- Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
- Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
- // Zero lower bits of the Vec
- Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
- // Merge them together
- return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
- }
- }
+ if (OpVT.getVectorElementType() == MVT::i1)
+ return Insert1BitVector(Op, DAG);
+
return SDValue();
}
@@ -11363,7 +12176,8 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
// load.
if (isGlobalStubReference(OpFlag))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
return Result;
}
@@ -11430,7 +12244,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
// load.
if (isGlobalStubReference(OpFlags))
Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -11587,7 +12402,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
}
Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
- MachinePointerInfo::getGOT(), false, false, false, 0);
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, false, false, 0);
}
// The address of the thread local variable is the add of the thread
@@ -11599,10 +12415,18 @@ SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+ // Cygwin uses emutls.
+ // FIXME: It may be EmulatedTLS-generic also for X86-Android.
+ if (Subtarget->isTargetWindowsCygwin())
+ return LowerToTLSEmulatedModel(GA, DAG);
+
const GlobalValue *GV = GA->getGlobal();
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Subtarget->isTargetELF()) {
+ if (DAG.getTarget().Options.EmulatedTLS)
+ return LowerToTLSEmulatedModel(GA, DAG);
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -11830,10 +12654,10 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
- SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
- StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ SDValue Chain = DAG.getStore(
+ DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
+ false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
}
@@ -11855,10 +12679,9 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
MachineMemOperand *MMO;
if (FI) {
int SSFI = FI->getIndex();
- MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, ByteSize, ByteSize);
+ MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOLoad, ByteSize, ByteSize);
} else {
MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
StackSlot = StackSlot.getOperand(1);
@@ -11884,16 +12707,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
};
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, SSFISize, SSFISize);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
Ops, Op.getValueType(), MMO);
- Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, false, 0);
+ Result = DAG.getLoad(
+ Op.getValueType(), DL, Chain, StackSlot,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ false, false, false, 0);
}
return Result;
@@ -11937,16 +12760,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
Op.getOperand(0));
- SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue CLod0 =
+ DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
SDValue Unpck1 =
getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
- SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue CLod1 =
+ DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
SDValue Result;
@@ -11996,10 +12822,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
// Subtract the bias.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
// Handle final rounding.
- EVT DestVT = Op.getValueType();
+ MVT DestVT = Op.getSimpleValueType();
if (DestVT.bitsLT(MVT::f64))
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
@@ -12025,14 +12852,23 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
// return (float4) lo + fhi;
+ // We shouldn't use it when unsafe-fp-math is enabled though: we might later
+ // reassociate the two FADDs, and if we do that, the algorithm fails
+ // spectacularly (PR24512).
+ // FIXME: If we ever have some kind of Machine FMF, this should be marked
+ // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
+ // there's also the MachineCombiner reassociations happening on Machine IR.
+ if (DAG.getTarget().Options.UnsafeFPMath)
+ return SDValue();
+
SDLoc DL(Op);
SDValue V = Op->getOperand(0);
- EVT VecIntVT = V.getValueType();
+ MVT VecIntVT = V.getSimpleValueType();
bool Is128 = VecIntVT == MVT::v4i32;
- EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
+ MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
// If we convert to something else than the supported type, e.g., to v4f64,
// abort early.
- if (VecFloatVT != Op->getValueType(0))
+ if (VecFloatVT != Op->getSimpleValueType(0))
return SDValue();
unsigned NumElts = VecIntVT.getVectorNumElements();
@@ -12070,7 +12906,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
SDValue Low, High;
if (Subtarget.hasSSE41()) {
- EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
+ MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
// uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
@@ -12108,6 +12944,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue FHigh =
DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
// return (float4) lo + fhi;
@@ -12137,11 +12974,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
case MVT::v16i8:
case MVT::v16i16:
- if (Subtarget->hasAVX512())
- return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
+ assert(Subtarget->hasAVX512());
+ return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
}
- llvm_unreachable(nullptr);
}
SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -12150,7 +12986,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (Op.getValueType().isVector())
+ if (Op.getSimpleValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
@@ -12161,6 +12997,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
MVT SrcVT = N0.getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
+
+ if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
+ (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) {
+ // Conversions from unsigned i32 to f32/f64 are legal,
+ // using VCVTUSI2SS/SD. Same for i64 in 64-bit mode.
+ return Op;
+ }
+
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
@@ -12193,10 +13037,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// we must be careful to do the computation in x87 extended precision, not
// in SSE. (The generic code can't know it's OK to do this, or how to.)
int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
- MachineMemOperand *MMO =
- DAG.getMachineFunction()
- .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, 8, 8);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+ MachineMemOperand::MOLoad, 8, 8);
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
@@ -12223,24 +13066,52 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
- FudgePtr, MachinePointerInfo::getConstantPool(),
- MVT::f32, false, false, false, 4);
+ SDValue Fudge = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+ false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));
}
+// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
+// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
+// just return an <SDValue(), SDValue()> pair.
+// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
+// to i16, i32 or i64, and we lower it to a legal sequence.
+// If lowered to the final integer result we return a <result, SDValue()> pair.
+// Otherwise we lower it to a sequence ending with a FIST, return a
+// <FIST, StackSlot> pair, and the caller is responsible for loading
+// the final integer result from StackSlot.
std::pair<SDValue,SDValue>
-X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
- bool IsSigned, bool IsReplace) const {
+X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+ bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
+ EVT TheVT = Op.getOperand(0).getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
+ if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
+ // f16 must be promoted before using the lowering in this routine.
+ // fp128 does not use this lowering.
+ return std::make_pair(SDValue(), SDValue());
+ }
+
+ // If using FIST to compute an unsigned i64, we'll need some fixup
+ // to handle values above the maximum signed i64. A FIST is always
+ // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
+ bool UnsignedFixup = !IsSigned &&
+ DstTy == MVT::i64 &&
+ (!Subtarget->is64Bit() ||
+ !isScalarFPTypeInSSEReg(TheVT));
+
+ if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+ // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
+ // The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;
}
@@ -12258,42 +13129,87 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());
- // We lower FP->int64 either into FISTP64 followed by a load from a temporary
- // stack slot, or into the FTOL runtime function.
+ // We lower FP->int64 into FISTP64 followed by a load from a temporary
+ // stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
- if (!IsSigned && isIntegerTypeFTOL(DstTy))
- Opc = X86ISD::WIN_FTOL;
- else
- switch (DstTy.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
- case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
- case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
- case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
- }
+ switch (DstTy.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+ case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+ case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+ case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+ }
SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);
- EVT TheVT = Op.getOperand(0).getValueType();
+ SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
+
+ if (UnsignedFixup) {
+ //
+ // Conversion to unsigned i64 is implemented with a select,
+ // depending on whether the source value fits in the range
+ // of a signed i64. Let Thresh be the FP equivalent of
+ // 0x8000000000000000ULL.
+ //
+ // Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
+ // FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
+ // Fist-to-mem64 FistSrc
+ // Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
+ // to XOR'ing the high 32 bits with Adjust.
+ //
+ // Being a power of 2, Thresh is exactly representable in all FP formats.
+ // For X87 we'd like to use the smallest FP type for this constant, but
+ // for DAG type consistency we have to match the FP operand type.
+
+ APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
+ LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+ bool LosesInfo = false;
+ if (TheVT == MVT::f64)
+ // The rounding mode is irrelevant as the conversion should be exact.
+ Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+ &LosesInfo);
+ else if (TheVT == MVT::f80)
+ Status = Thresh.convert(APFloat::x87DoubleExtended,
+ APFloat::rmNearestTiesToEven, &LosesInfo);
+
+ assert(Status == APFloat::opOK && !LosesInfo &&
+ "FP conversion should have been exact");
+
+ SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
+
+ SDValue Cmp = DAG.getSetCC(DL,
+ getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0x80000000, DL, MVT::i32));
+ SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
+ Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), TheVT),
+ Value, ThreshVal, ISD::SETLT);
+ Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
+ }
+
// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,
- MachinePointerInfo::getFixedStack(SSFI),
- false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, SSFI), false,
+ false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
SDValue Ops[] = {
Chain, StackSlot, DAG.getValueType(TheVT)
};
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOLoad, MemSize, MemSize);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOLoad, MemSize, MemSize);
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
@@ -12301,28 +13217,52 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
}
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, MemSize, MemSize);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOStore, MemSize, MemSize);
+
+ if (UnsignedFixup) {
+
+ // Insert the FIST, load its result as two i32's,
+ // and XOR the high i32 with Adjust.
+
+ SDValue FistOps[] = { Chain, Value, StackSlot };
+ SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+ FistOps, DstTy, MMO);
+
+ SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
+ DAG.getConstant(4, DL, PtrVT));
- if (Opc != X86ISD::WIN_FTOL) {
+ SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
+
+ if (Subtarget->is64Bit()) {
+ // Join High32 and Low32 into a 64-bit result.
+ // (High32 << 32) | Low32
+ Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
+ High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
+ High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
+ DAG.getConstant(32, DL, MVT::i8));
+ SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
+ return std::make_pair(Result, SDValue());
+ }
+
+ SDValue ResultOps[] = { Low32, High32 };
+
+ SDValue pair = IsReplace
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
+ : DAG.getMergeValues(ResultOps, DL);
+ return std::make_pair(pair, SDValue());
+ } else {
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
- } else {
- SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
- DAG.getVTList(MVT::Other, MVT::Glue),
- Chain, Value);
- SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
- MVT::i32, ftol.getValue(1));
- SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
- MVT::i32, eax.getValue(2));
- SDValue Ops[] = { eax, edx };
- SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
- : DAG.getMergeValues(Ops, DL);
- return std::make_pair(pair, SDValue());
}
}
@@ -12333,7 +13273,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
MVT InVT = In.getSimpleValueType();
SDLoc dl(Op);
- if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1)
+ if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);
// Optimize vectors in AVX mode:
@@ -12426,6 +13366,62 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
+static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
+ MVT InVT = In.getSimpleValueType();
+
+ assert(VT.getVectorElementType() == MVT::i1 && "Unexected vector type.");
+
+ // Shift LSB to MSB and use VPMOVB2M - SKX.
+ unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
+ if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
+ Subtarget->hasBWI()) || // legal, will go to VPMOVB2M, VPMOVW2M
+ ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+ InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() &&
+ Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
+ // Shift packed bytes not supported natively, bitcast to dword
+ MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
+ DAG.getBitcast(ExtVT, In),
+ DAG.getConstant(ShiftInx, DL, ExtVT));
+ ShiftNode = DAG.getBitcast(InVT, ShiftNode);
+ return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+ }
+ if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
+ Subtarget->hasDQI()) || // legal, will go to VPMOVD2M, VPMOVQ2M
+ ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+ InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() &&
+ Subtarget->hasVLX())) { // legal, will go to VPMOVD2M, VPMOVQ2M
+
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+ DAG.getConstant(ShiftInx, DL, InVT));
+ return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+ }
+
+ // Shift LSB to MSB, extend if necessary and use TESTM.
+ unsigned NumElts = InVT.getVectorNumElements();
+ if (InVT.getSizeInBits() < 512 &&
+ (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 ||
+ !Subtarget->hasVLX())) {
+ assert((NumElts == 8 || NumElts == 16) && "Unexected vector type.");
+
+ // TESTD/Q should be used (if BW supported we use CVT2MASK above),
+ // so vector should be extended to packed dword/qword.
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+ In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+ InVT = ExtVT;
+ ShiftInx = InVT.getScalarSizeInBits() - 1;
+ }
+
+ SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+ DAG.getConstant(ShiftInx, DL, InVT));
+ return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
+}
+
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
@@ -12443,42 +13439,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
- // move vector to mask - truncate solution for SKX
- if (VT.getVectorElementType() == MVT::i1) {
- if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
- Subtarget->hasBWI())
- return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
- && InVT.getScalarSizeInBits() <= 16 &&
- Subtarget->hasBWI() && Subtarget->hasVLX())
- return Op; // legal, will go to VPMOVB2M, VPMOVW2M
- if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
- Subtarget->hasDQI())
- return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
- if ((InVT.is256BitVector() || InVT.is128BitVector())
- && InVT.getScalarSizeInBits() >= 32 &&
- Subtarget->hasDQI() && Subtarget->hasVLX())
- return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
- }
- if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
- if (VT.getVectorElementType().getSizeInBits() >=8)
- return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
-
- assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
- unsigned NumElts = InVT.getVectorNumElements();
- assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
- if (InVT.getSizeInBits() < 512) {
- MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
- In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
- InVT = ExtVT;
- }
-
- SDValue OneV =
- DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT);
- SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
- return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
- }
+ if (VT.getVectorElementType() == MVT::i1)
+ return LowerTruncateVecI1(Op, DAG, Subtarget);
+ // vpmovqb/w/d, vpmovdb/w, vpmovwb
+ if (Subtarget->hasAVX512()) {
+ // word to byte only under BWI
+ if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT,
+ DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
+ return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
+ }
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget->hasInt256()) {
@@ -12583,7 +13554,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
/*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
- if (!FIST.getNode()) return Op;
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -12600,7 +13572,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ false, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
- assert(FIST.getNode() && "Unexpected failure");
+ // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+ if (!FIST.getNode())
+ return Op;
if (StackSlot.getNode())
// Load the result.
@@ -12643,6 +13617,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ bool IsF128 = (VT == MVT::f128);
+
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
@@ -12650,11 +13626,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT LogicVT;
MVT EltVT;
unsigned NumElts;
-
+
if (VT.isVector()) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
+ } else if (IsF128) {
+ // SSE instructions are used for optimized f128 logical operations.
+ LogicVT = MVT::f128;
+ EltVT = VT;
+ NumElts = 1;
} else {
// There are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
@@ -12675,9 +13656,10 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
- SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, Alignment);
+ SDValue Mask =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, Alignment);
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
@@ -12685,7 +13667,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
- if (VT.isVector())
+ if (VT.isVector() || IsF128)
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
// For the scalar case extend to a 128-bit vector, perform the logic op,
@@ -12704,6 +13686,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op1.getSimpleValueType();
+ bool IsF128 = (VT == MVT::f128);
// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
@@ -12718,13 +13701,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
+ assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
+ "Unexpected type in LowerFCOPYSIGN");
const fltSemantics &Sem =
- VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
+ VT == MVT::f64 ? APFloat::IEEEdouble :
+ (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
const unsigned SizeInBits = VT.getSizeInBits();
SmallVector<Constant *, 4> CV(
- VT == MVT::f64 ? 2 : 4,
+ VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
// First, clear all bits but the sign bit from the second operand (sign).
@@ -12737,11 +13723,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// Perform all logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE. This allows load folding of the
// constants into the logic instructions.
- MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
- SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
- Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
+ MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
+ SDValue Mask1 =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
+ if (!IsF128)
+ Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
// Next, clear the sign bit from the first operand (magnitude).
@@ -12750,8 +13738,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat APF = Op0CN->getValueAPF();
// If the magnitude is a positive zero, the sign bit alone is enough.
if (APF.isPosZero())
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
- DAG.getIntPtrConstant(0, dl));
+ return IsF128 ? SignBit :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
+ DAG.getIntPtrConstant(0, dl));
APF.clearSign();
CV[0] = ConstantFP::get(*Context, APF);
} else {
@@ -12761,18 +13750,21 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, PtrVT, 16);
- SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
- MachinePointerInfo::getConstantPool(),
- false, false, false, 16);
+ SDValue Val =
+ DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+ false, false, false, 16);
// If the magnitude operand wasn't a constant, we need to AND out the sign.
if (!isa<ConstantFPSDNode>(Op0)) {
- Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
+ if (!IsF128)
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
}
// OR the magnitude value with the sign bit.
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
- DAG.getIntPtrConstant(0, dl));
+ return IsF128 ? Val :
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
+ DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -12859,7 +13851,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
return SDValue();
}
- EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+ MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
// Cast all vectors into TestVT for PTEST.
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
@@ -12999,14 +13991,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
+ if (C->isOne() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
+ if (C->isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
@@ -13135,13 +14127,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
SDLoc dl, SelectionDAG &DAG) const {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
- if (C->getAPIntValue() == 0)
- return EmitTest(Op0, X86CC, dl, DAG);
+ if (isNullConstant(Op1))
+ return EmitTest(Op0, X86CC, dl, DAG);
- if (Op0.getValueType() == MVT::i1)
- llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
- }
+ assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
+ "Unexpected comparison operation for MVT::i1 operands");
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
@@ -13150,8 +14140,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
- !DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::MinSize) &&
+ !DAG.getMachineFunction().getFunction()->optForMinSize() &&
!Subtarget->isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
@@ -13188,6 +14177,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+
+ // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+ assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
@@ -13261,13 +14253,8 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
/// original divisions.
-bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
- return NumUsers > 1;
-}
-
-static bool isAllOnes(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isAllOnesValue();
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
+ return 2;
}
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
@@ -13285,8 +14272,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
if (Op1.getOpcode() == ISD::SHL)
std::swap(Op0, Op1);
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
- if (And00C->getZExtValue() == 1) {
+ if (isOneConstant(Op0.getOperand(0))) {
// If we looked past a truncate, check that it's only truncating away
// known zeros.
unsigned BitWidth = Op0.getValueSizeInBits();
@@ -13423,7 +14409,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType() == MVT::i1 &&
+ assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Unexpected type for boolean compare operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
@@ -13467,8 +14453,8 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
- assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
- Op.getValueType().getScalarType() == MVT::i1 &&
+ assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
+ Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
@@ -13515,7 +14501,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
for (unsigned i = 0; i < n; ++i) {
ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
- if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
+ if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
return SDValue();
// Avoid underflow.
@@ -13606,13 +14592,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (VT.is256BitVector() && !Subtarget->hasInt256())
return Lower256IntVSETCC(Op, DAG);
- EVT OpVT = Op1.getValueType();
+ MVT OpVT = Op1.getSimpleValueType();
if (OpVT.getVectorElementType() == MVT::i1)
return LowerBoolVSETCC_AVX512(Op, DAG);
bool MaskResult = (VT.getVectorElementType() == MVT::i1);
if (Subtarget->hasAVX512()) {
- if (Op1.getValueType().is512BitVector() ||
+ if (Op1.getSimpleValueType().is512BitVector() ||
(Subtarget->hasBWI() && Subtarget->hasVLX()) ||
(MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
@@ -13628,6 +14614,33 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
}
+ // Lower using XOP integer comparisons.
+ if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+ // Translate compare code to XOP PCOM compare mode.
+ unsigned CmpMode = 0;
+ switch (SetCCOpcode) {
+ default: llvm_unreachable("Unexpected SETCC condition");
+ case ISD::SETULT:
+ case ISD::SETLT: CmpMode = 0x00; break;
+ case ISD::SETULE:
+ case ISD::SETLE: CmpMode = 0x01; break;
+ case ISD::SETUGT:
+ case ISD::SETGT: CmpMode = 0x02; break;
+ case ISD::SETUGE:
+ case ISD::SETGE: CmpMode = 0x03; break;
+ case ISD::SETEQ: CmpMode = 0x04; break;
+ case ISD::SETNE: CmpMode = 0x05; break;
+ }
+
+ // Are we comparing unsigned or signed integers?
+ unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
+ ? X86ISD::VPCOMU : X86ISD::VPCOM;
+
+ return DAG.getNode(Opc, dl, VT, Op0, Op1,
+ DAG.getConstant(CmpMode, dl, MVT::i8));
+ }
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -13777,7 +14790,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations.
if (FlipSigns) {
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
VT);
Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
@@ -13818,11 +14831,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Lower ((X >>u N) & 1) != 0 to BT(X, N).
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
- Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->isNullValue() &&
+ isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
- SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
if (VT == MVT::i1)
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
return NewSetCC;
@@ -13831,17 +14842,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
- if (Op1.getOpcode() == ISD::Constant &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
- cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// If the input is a setcc, then reuse the input setcc or use a new one with
// the inverted condition.
if (Op0.getOpcode() == X86ISD::SETCC) {
X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- bool Invert = (CC == ISD::SETNE) ^
- cast<ConstantSDNode>(Op1)->isNullValue();
+ bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
if (!Invert)
return Op0;
@@ -13854,8 +14862,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
}
- if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
- (cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
+ if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
@@ -13876,6 +14883,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return SetCC;
}
+SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue Carry = Op.getOperand(2);
+ SDValue Cond = Op.getOperand(3);
+ SDLoc DL(Op);
+
+ assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+ X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
+
+ assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+ SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+ SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
+ return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(),
+ DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
+}
+
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
static bool isX86LogicalCmp(SDValue Op) {
unsigned Opc = Op.getNode()->getOpcode();
@@ -13918,7 +14942,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
SDLoc DL(Op);
- EVT VT = Op1.getValueType();
+ MVT VT = Op1.getSimpleValueType();
SDValue CC;
// Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
@@ -13927,7 +14951,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::SETCC &&
((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
(Subtarget->hasSSE1() && VT == MVT::f32)) &&
- VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
+ VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
int SSECC = translateX86FSETCC(
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
@@ -13961,12 +14985,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Convert to vectors, do a VSELECT, and convert back to scalar.
// All of the conversions should be optimized away.
- EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+ MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
- EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+ MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
VCmp = DAG.getBitcast(VCmpVT, VCmp);
SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
@@ -13980,26 +15004,26 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (VT.isVector() && VT.getScalarType() == MVT::i1) {
- SDValue Op1Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
- Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
- else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
- Op1Scalar = Op1.getOperand(0);
- SDValue Op2Scalar;
- if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
- Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
- else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
- Op2Scalar = Op2.getOperand(0);
- if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
- SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
- Op1Scalar.getValueType(),
- Cond, Op1Scalar, Op2Scalar);
- if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
- return DAG.getBitcast(VT, newSelect);
- SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
- DAG.getIntPtrConstant(0, DL));
+ if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
+ SDValue Op1Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+ Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
+ else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+ Op1Scalar = Op1.getOperand(0);
+ SDValue Op2Scalar;
+ if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+ Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
+ else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+ Op2Scalar = Op2.getOperand(0);
+ if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+ SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+ Op1Scalar.getValueType(),
+ Cond, Op1Scalar, Op2Scalar);
+ if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+ return DAG.getBitcast(VT, newSelect);
+ SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+ DAG.getIntPtrConstant(0, DL));
}
}
@@ -14026,22 +15050,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
- isZero(Cond.getOperand(1).getOperand(1))) {
+ isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
- if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+ if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
- SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+ SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
SDValue CmpOp0 = Cmp.getOperand(0);
// Apply further optimizations for special cases
// (select (x != 0), -1, 0) -> neg & sbb
// (select (x == 0), 0, -1) -> neg & sbb
- if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
- if (YC->isNullValue() &&
- (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
+ if (isNullConstant(Y) &&
+ (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
DAG.getConstant(0, DL,
@@ -14061,11 +15084,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
- if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+ if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- if (!N2C || !N2C->isNullValue())
+ if (!isNullConstant(Op2))
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
}
@@ -14073,11 +15095,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Look past (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
- Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
- Cond = Cond.getOperand(0);
- }
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+ isOneConstant(Cond.getOperand(1)))
+ Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
@@ -14136,15 +15156,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- // Look pass the truncate if the high bits are known zero.
+ // Look past the truncate if the high bits are known zero.
if (isTruncWithZeroHighBitsInput(Cond, DAG))
- Cond = Cond.getOperand(0);
+ Cond = Cond.getOperand(0);
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
@@ -14166,11 +15185,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
- (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+ (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
+ (isNullConstant(Op1) || isNullConstant(Op2))) {
SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
DAG.getConstant(X86::COND_B, DL, MVT::i8),
Cond);
- if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+ if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
return DAG.getNOT(DL, Res, Res.getValueType());
return Res;
}
@@ -14256,8 +15276,8 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
MVT InVT = In.getSimpleValueType();
assert(VT.getSizeInBits() == InVT.getSizeInBits());
- MVT InSVT = InVT.getScalarType();
- assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits());
+ MVT InSVT = InVT.getVectorElementType();
+ assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits());
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
@@ -14276,7 +15296,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
// As SRAI is only available on i16/i32 types, we expand only up to i32
// and handle i64 separately.
- while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
+ while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
@@ -14286,7 +15306,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
SDValue SignExt = Curr;
if (CurrVT != InVT) {
unsigned SignExtShift =
- CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
+ CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
}
@@ -14346,7 +15366,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
- MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+ MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements()/2);
OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
@@ -14470,7 +15490,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
// memory. In practice, we ''widen'' MemVT.
EVT WideVecVT =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
- loadRegZize / MemVT.getScalarType().getSizeInBits());
+ loadRegZize / MemVT.getScalarSizeInBits());
assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
"Invalid vector type");
@@ -14518,29 +15538,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
return Sext;
}
- // Otherwise we'll shuffle the small elements in the high bits of the
- // larger type and perform an arithmetic shift. If the shift is not legal
- // it's better to scalarize.
- assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
- "We can't implement a sext load without an arithmetic right shift!");
-
- // Redistribute the loaded elements into the different locations.
- SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i != NumElems; ++i)
- ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
-
- SDValue Shuff = DAG.getVectorShuffle(
- WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
-
- Shuff = DAG.getBitcast(RegVT, Shuff);
-
- // Build the arithmetic shift.
- unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
- MemVT.getVectorElementType().getSizeInBits();
- Shuff =
- DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
- DAG.getConstant(Amt, dl, RegVT));
+ // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
+ // lanes.
+ assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
+ "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
+ SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Shuff;
}
@@ -14577,11 +15580,9 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
static bool isXor1OfSetCC(SDValue Op) {
if (Op.getOpcode() != ISD::XOR)
return false;
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (N1C && N1C->getAPIntValue() == 1) {
+ if (isOneConstant(Op.getOperand(1)))
return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
- Op.getOperand(0).hasOneUse();
- }
+ Op.getOperand(0).hasOneUse();
return false;
}
@@ -14597,8 +15598,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (Cond.getOpcode() == ISD::SETCC) {
// Check for setcc([su]{add,sub,mul}o == 0).
if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
- isa<ConstantSDNode>(Cond.getOperand(1)) &&
- cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+ isNullConstant(Cond.getOperand(1)) &&
Cond.getOperand(0).getResNo() == 1 &&
(Cond.getOperand(0).getOpcode() == ISD::SADDO ||
Cond.getOperand(0).getOpcode() == ISD::UADDO ||
@@ -14625,11 +15625,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// Look pass (and (setcc_carry (cmp ...)), 1).
if (Cond.getOpcode() == ISD::AND &&
- Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (C && C->getAPIntValue() == 1)
- Cond = Cond.getOperand(0);
- }
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+ isOneConstant(Cond.getOperand(1)))
+ Cond = Cond.getOperand(0);
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
@@ -14673,16 +15671,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
switch (CondOpcode) {
case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
case ISD::SADDO:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
break;
}
X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
case ISD::SSUBO:
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
break;
}
@@ -14844,8 +15840,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
- SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
- if (NewSetCC.getNode()) {
+ if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
CC = NewSetCC.getOperand(0);
Cond = NewSetCC.getOperand(1);
addTest = false;
@@ -14877,54 +15872,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SplitStack;
SDLoc dl(Op);
+ // Get the inputs.
+ SDNode *Node = Op.getNode();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ EVT VT = Node->getValueType(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+
+ bool Is64Bit = Subtarget->is64Bit();
+ MVT SPTy = getPointerTy(DAG.getDataLayout());
+
+ SDValue Result;
if (!Lower) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDNode* Node = Op.getNode();
-
unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
- " not tell us which reg is the stack pointer!");
+ " not tell us which reg is the stack pointer!");
EVT VT = Node->getValueType(0);
- SDValue Tmp1 = SDValue(Node, 0);
- SDValue Tmp2 = SDValue(Node, 1);
SDValue Tmp3 = Node->getOperand(2);
- SDValue Chain = Tmp1.getOperand(0);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true),
- SDLoc(Node));
- SDValue Size = Tmp2.getOperand(1);
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
- Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
- Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
- DAG.getConstant(-(uint64_t)Align, dl, VT));
- Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
-
- Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
- DAG.getIntPtrConstant(0, dl, true), SDValue(),
- SDLoc(Node));
-
- SDValue Ops[2] = { Tmp1, Tmp2 };
- return DAG.getMergeValues(Ops, dl);
- }
-
- // Get the inputs.
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- EVT VT = Op.getNode()->getValueType(0);
-
- bool Is64Bit = Subtarget->is64Bit();
- MVT SPTy = getPointerTy(DAG.getDataLayout());
-
- if (SplitStack) {
+ Result = DAG.getNode(ISD::AND, dl, VT, Result,
+ DAG.getConstant(-(uint64_t)Align, dl, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
+ } else if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
@@ -14942,10 +15923,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
- SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+ Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
- SDValue Ops1[2] = { Value, Chain };
- return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
@@ -14967,9 +15946,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
}
- SDValue Ops1[2] = { SP, Chain };
- return DAG.getMergeValues(Ops1, dl);
+ Result = SP;
}
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+ DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+ SDValue Ops[2] = {Result, Chain};
+ return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -14980,7 +15964,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SDLoc DL(Op);
- if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+ if (!Subtarget->is64Bit() ||
+ Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -15019,10 +16004,11 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
+ Subtarget->isTarget64BitLP64() ? 8 : 4, DL));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
- Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
- MachinePointerInfo(SV, 16), false, false, 0);
+ Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(
+ SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
@@ -15030,10 +16016,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->is64Bit() &&
"LowerVAARG only handles 64-bit va_arg!");
- assert((Subtarget->isTargetLinux() ||
- Subtarget->isTargetDarwin()) &&
- "Unhandled target in LowerVAARG");
assert(Op.getNode()->getNumOperands() == 4);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+ // The Win64 ABI uses char* instead of a structure.
+ return DAG.expandVAArg(Op.getNode());
+
SDValue Chain = Op.getOperand(0);
SDValue SrcPtr = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -15061,8 +16050,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
assert(!Subtarget->useSoftFloat() &&
- !(DAG.getMachineFunction().getFunction()->hasFnAttribute(
- Attribute::NoImplicitFloat)) &&
+ !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1());
}
@@ -15091,8 +16079,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+ // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
+ // where a va_list is still an i8*.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+ if (Subtarget->isCallingConvWin64(
+ DAG.getMachineFunction().getFunction()->getCallingConv()))
+ // Probably a Win64 va_copy.
+ return DAG.expandVACopy(Op.getNode());
+
SDValue Chain = Op.getOperand(0);
SDValue DstPtr = Op.getOperand(1);
SDValue SrcPtr = Op.getOperand(2);
@@ -15230,72 +16224,126 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
// The return type has to be a 128-bit type with the same element
// type as the input type.
MVT EltVT = VT.getVectorElementType();
- EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+ MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
ShAmt = DAG.getBitcast(ShVT, ShAmt);
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
}
-/// \brief Return (and \p Op, \p Mask) for compare instructions or
-/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
-/// necessary casting for \p Mask when lowering masking intrinsics.
-static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
- SDValue PreservedSrc,
- const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
- MVT::i1, VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
+/// \brief Return Mask with the necessary casting or extending
+/// for \p Mask according to \p MaskVT when lowering masking intrinsics
+static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG, SDLoc dl) {
- assert(MaskVT.isSimple() && "invalid mask type");
+ if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
+ // Mask should be extended
+ Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
+ MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
+ }
- if (isAllOnes(Mask))
- return Op;
+ if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
+ if (MaskVT == MVT::v64i1) {
+ assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
+ // In case 32bit mode, bitcast i64 is illegal, extend/split it.
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+ DAG.getConstant(0, dl, MVT::i32));
+ Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+ DAG.getConstant(1, dl, MVT::i32));
+
+ Lo = DAG.getBitcast(MVT::v32i1, Lo);
+ Hi = DAG.getBitcast(MVT::v32i1, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
+ } else {
+ // MaskVT require < 64bit. Truncate mask (should succeed in any case),
+ // and bitcast.
+ MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
+ return DAG.getBitcast(MaskVT,
+ DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
+ }
+
+ } else {
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
+ }
+}
- switch (Op.getOpcode()) {
- default: break;
- case X86ISD::PCMPEQM:
- case X86ISD::PCMPGTM:
- case X86ISD::CMPM:
- case X86ISD::CMPMU:
- return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
- }
- if (PreservedSrc.getOpcode() == ISD::UNDEF)
- PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
+/// \brief Return (and \p Op, \p Mask) for compare instructions or
+/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
+/// necessary casting or extending for \p Mask when lowering masking intrinsics
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ unsigned OpcodeSelect = ISD::VSELECT;
+ SDLoc dl(Op);
+
+ if (isAllOnesConstant(Mask))
+ return Op;
+
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+ switch (Op.getOpcode()) {
+ default: break;
+ case X86ISD::PCMPEQM:
+ case X86ISD::PCMPGTM:
+ case X86ISD::CMPM:
+ case X86ISD::CMPMU:
+ return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+ case X86ISD::VFPCLASS:
+ case X86ISD::VFPCLASSS:
+ return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+ case X86ISD::VTRUNC:
+ case X86ISD::VTRUNCS:
+ case X86ISD::VTRUNCUS:
+ // We can't use ISD::VSELECT here because it is not always "Legal"
+ // for the destination type. For example vpmovqb require only AVX512
+ // and vselect that can operate on byte element type require BWI
+ OpcodeSelect = X86ISD::SELECT;
+ break;
+ }
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
}
/// \brief Creates an SDNode for a predicated scalar operation.
/// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
-/// The mask is comming as MVT::i8 and it should be truncated
+/// The mask is coming as MVT::i8 and it should be truncated
/// to MVT::i1 while lowering masking intrinsics.
/// The main difference between ScalarMaskingNode and VectorMaskingNode is using
-/// "X86select" instead of "vselect". We just can't create the "vselect" node for
-/// a scalar instruction.
+/// "X86select" instead of "vselect". We just can't create the "vselect" node
+/// for a scalar instruction.
static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
SDValue PreservedSrc,
const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- if (isAllOnes(Mask))
- return Op;
+ if (isAllOnesConstant(Mask))
+ return Op;
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- // The mask should be of type MVT::i1
- SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
+ MVT VT = Op.getSimpleValueType();
+ SDLoc dl(Op);
+ // The mask should be of type MVT::i1
+ SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
- if (PreservedSrc.getOpcode() == ISD::UNDEF)
- PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
+ if (Op.getOpcode() == X86ISD::FSETCC)
+ return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
+ if (Op.getOpcode() == X86ISD::VFPCLASS ||
+ Op.getOpcode() == X86ISD::VFPCLASSS)
+ return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
+
+ if (PreservedSrc.getOpcode() == ISD::UNDEF)
+ PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
static int getSEHRegistrationNodeSize(const Function *Fn) {
@@ -15309,15 +16357,16 @@ static int getSEHRegistrationNodeSize(const Function *Fn) {
case EHPersonality::MSVC_CXX: return 16;
default: break;
}
- report_fatal_error("can only recover FP for MSVC EH personality functions");
+ report_fatal_error(
+ "can only recover FP for 32-bit MSVC EH personality functions");
}
-/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// When the MSVC runtime transfers control to us, either to an outlined
/// function or when returning to a parent frame after catching an exception, we
/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
/// Here's the math:
/// RegNodeBase = EntryEBP - RegNodeSize
-/// ParentFP = RegNodeBase - RegNodeFrameOffset
+/// ParentFP = RegNodeBase - ParentFrameOffset
/// Subtracting RegNodeSize takes us to the offset of the registration node, and
/// subtracting the offset (negative on x86) takes us back to the parent FP.
static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
@@ -15334,29 +16383,35 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
if (!Fn->hasPersonalityFn())
return EntryEBP;
- int RegNodeSize = getSEHRegistrationNodeSize(Fn);
-
// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
- // registration.
+ // registration, or the .set_setframe offset.
MCSymbol *OffsetSym =
MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
GlobalValue::getRealLinkageName(Fn->getName()));
SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
- SDValue RegNodeFrameOffset =
+ SDValue ParentFrameOffset =
DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
+ // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
+ // prologue to RBP in the parent function.
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ if (Subtarget.is64Bit())
+ return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
+
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
// RegNodeBase = EntryEBP - RegNodeSize
- // ParentFP = RegNodeBase - RegNodeFrameOffset
+ // ParentFP = RegNodeBase - ParentFrameOffset
SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
DAG.getConstant(RegNodeSize, dl, PtrVT));
- return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+ return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
}
static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
if (IntrData) {
switch(IntrData->Type) {
@@ -15365,6 +16420,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_2OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
+ case INTR_TYPE_2OP_IMM8:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
@@ -15376,28 +16434,53 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue RoundingMode;
+ // We allways add rounding mode to the Node.
+ // If the rounding mode is not specified, we add the
+ // "current direction" mode.
if (Op.getNumOperands() == 4)
- RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ RoundingMode =
+ DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
else
RoundingMode = Op.getOperand(4);
unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
- if (IntrWithRoundingModeOpcode != 0) {
- unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
- if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+ if (IntrWithRoundingModeOpcode != 0)
+ if (cast<ConstantSDNode>(RoundingMode)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
dl, Op.getValueType(), Src, RoundingMode),
Mask, PassThru, Subtarget, DAG);
- }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
RoundingMode),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_1OP_MASK: {
SDValue Src = Op.getOperand(1);
- SDValue Passthru = Op.getOperand(2);
+ SDValue PassThru = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
+ // We add rounding mode to the Node when
+ // - RM Opcode is specified and
+ // - RM is not "current direction".
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(4);
+ unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+ if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+ return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, Op.getValueType(),
+ Src, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ }
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
- Mask, Passthru, Subtarget, DAG);
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_SCALAR_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue passThru = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
+ Mask, passThru, Subtarget, DAG);
}
case INTR_TYPE_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
@@ -15405,7 +16488,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src0 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
// There are 2 kinds of intrinsics in this group:
- // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands
+ // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
// (2) With rounding mode and sae - 7 operands.
if (Op.getNumOperands() == 6) {
SDValue Sae = Op.getOperand(5);
@@ -15421,11 +16504,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
RoundingMode, Sae),
Mask, Src0, Subtarget, DAG);
}
- case INTR_TYPE_2OP_MASK: {
+ case INTR_TYPE_2OP_MASK:
+ case INTR_TYPE_2OP_IMM8_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+
+ if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
+ Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15440,8 +16528,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
}
}
- return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
- Src1,Src2),
+ // TODO: Intrinsics should have fast-math-flags to propagate.
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_2OP_MASK_RM: {
@@ -15449,7 +16537,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue PassThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- // We specify 2 possible modes for intrinsics, with/without rounding modes.
+ // We specify 2 possible modes for intrinsics, with/without rounding
+ // modes.
// First, we check if the intrinsic have rounding mode (6 operands),
// if not, we set rounding mode to "current".
SDValue Rnd;
@@ -15461,12 +16550,56 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Rnd),
Mask, PassThru, Subtarget, DAG);
}
- case INTR_TYPE_3OP_MASK: {
+ case INTR_TYPE_3OP_SCALAR_MASK_RM: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue PassThru = Op.getOperand(4);
SDValue Mask = Op.getOperand(5);
+ SDValue Sae = Op.getOperand(6);
+
+ return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
+ Src2, Src3, Sae),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_3OP_MASK_RM: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Imm = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ // We specify 2 possible modes for intrinsics, with/without rounding
+ // modes.
+ // First, we check if the intrinsic have rounding mode (7 operands),
+ // if not, we set rounding mode to "current".
+ SDValue Rnd;
+ if (Op.getNumOperands() == 7)
+ Rnd = Op.getOperand(6);
+ else
+ Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Imm, Rnd),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case INTR_TYPE_3OP_IMM8_MASK:
+ case INTR_TYPE_3OP_MASK:
+ case INSERT_SUBVEC: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue PassThru = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+
+ if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
+ Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
+ else if (IntrData->Type == INSERT_SUBVEC) {
+ // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
+ assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
+ unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
+ Imm *= Src2.getSimpleValueType().getVectorNumElements();
+ Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
+ }
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15486,7 +16619,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Mask, PassThru, Subtarget, DAG);
}
case VPERM_3OP_MASKZ:
- case VPERM_3OP_MASK:
+ case VPERM_3OP_MASK:{
+ // Src2 is the PassThru
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+ SDValue PassThru = SDValue();
+
+ // set PassThru element
+ if (IntrData->Type == VPERM_3OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+ else
+ PassThru = DAG.getBitcast(VT, Src2);
+
+ // Swap Src1 and Src2 in the node creation
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+ dl, Op.getValueType(),
+ Src2, Src1, Src3),
+ Mask, PassThru, Subtarget, DAG);
+ }
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
case FMA_OP_MASK: {
@@ -15494,11 +16647,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
- if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+ if (IntrData->Type == FMA_OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else if (IntrData->Type == FMA_OP_MASK3)
PassThru = Src3;
@@ -15523,6 +16676,50 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
+ case TERLOG_OP_MASK:
+ case TERLOG_OP_MASKZ: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
+ SDValue Mask = Op.getOperand(5);
+ MVT VT = Op.getSimpleValueType();
+ SDValue PassThru = Src1;
+ // Set PassThru element.
+ if (IntrData->Type == TERLOG_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ Src1, Src2, Src3, Src4),
+ Mask, PassThru, Subtarget, DAG);
+ }
+ case FPCLASS: {
+ // FPclass intrinsics with mask
+ SDValue Src1 = Op.getOperand(1);
+ MVT VT = Src1.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue Imm = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
+ SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask,
+ DAG.getTargetConstant(0, dl, MaskVT),
+ Subtarget, DAG);
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), FPclassMask,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getBitcast(Op.getValueType(), Res);
+ }
+ case FPCLASSS: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Imm = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
+ SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
+ DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
+ }
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.
@@ -15534,12 +16731,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
// (v2i1 (and (PCMPEQM %a, %b),
// (extract_subvector
// (v8i1 (bitcast %mask)), 0))), 0))))
- EVT VT = Op.getOperand(1).getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
+ MVT VT = Op.getOperand(1).getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
SDValue Cmp;
if (IntrData->Type == CMP_MASK_CC) {
SDValue CC = Op.getOperand(3);
@@ -15573,6 +16769,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
DAG.getIntPtrConstant(0, dl));
return DAG.getBitcast(Op.getValueType(), Res);
}
+ case CMP_MASK_SCALAR_CC: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
+ SDValue Mask = Op.getOperand(4);
+
+ SDValue Cmp;
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
+ }
+ //default rounding mode
+ if(!Cmp.getNode())
+ Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC);
+
+ SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
+ DAG.getTargetConstant(0, dl,
+ MVT::i1),
+ Subtarget, DAG);
+
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8,
+ DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask),
+ DAG.getValueType(MVT::i1));
+ }
case COMI: { // Comparison intrinsics
ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
SDValue LHS = Op.getOperand(1);
@@ -15584,6 +16806,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
DAG.getConstant(X86CC, dl, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case COMI_RM: { // Comparison intrinsics with Sae
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ SDValue CC = Op.getOperand(3);
+ SDValue Sae = Op.getOperand(4);
+ auto ComiType = TranslateX86ConstCondToX86CC(CC);
+ // choose between ordered and unordered (comi/ucomi)
+ unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
+ SDValue Cond;
+ if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
+ X86::STATIC_ROUNDING::CUR_DIRECTION)
+ Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+ else
+ Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
+ SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
+ return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+ }
case VSHIFT:
return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
Op.getOperand(1), Op.getOperand(2), DAG);
@@ -15598,27 +16838,75 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Mask = Op.getOperand(3);
SDValue DataToCompress = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
- if (isAllOnes(Mask)) // return data as is
+ if (isAllOnesConstant(Mask)) // return data as is
return Op.getOperand(1);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
DataToCompress),
Mask, PassThru, Subtarget, DAG);
}
+ case BROADCASTM: {
+ SDValue Mask = Op.getOperand(1);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ Mask = DAG.getBitcast(MaskVT, Mask);
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
+ }
case BLEND: {
SDValue Mask = Op.getOperand(3);
- EVT VT = Op.getValueType();
- EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- VT.getVectorNumElements());
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
- SDLoc dl(Op);
- SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
- DAG.getBitcast(BitcastVT, Mask),
- DAG.getIntPtrConstant(0, dl));
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
+ case KUNPCK: {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
+
+ SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
+ SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
+ // Arguments should be swapped.
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl,
+ MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
+ Src2, Src1);
+ return DAG.getBitcast(VT, Res);
+ }
+ case CONVERT_TO_MASK: {
+ MVT SrcVT = Op.getOperand(1).getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
+
+ SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT,
+ Op.getOperand(1));
+ SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+ DAG.getUNDEF(BitcastVT), CvtMask,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getBitcast(Op.getValueType(), Res);
+ }
+ case CONVERT_MASK_TO_VEC: {
+ SDValue Mask = Op.getOperand(1);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getNode(IntrData->Opc0, dl, VT, VMask);
+ }
+ case BRCST_SUBVEC_TO_VEC: {
+ SDValue Src = Op.getOperand(1);
+ SDValue Passthru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+ EVT resVT = Passthru.getValueType();
+ SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT,
+ DAG.getUNDEF(resVT), Src,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue immVal;
+ if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector())
+ immVal = DAG.getConstant(0x44, dl, MVT::i8);
+ else
+ immVal = DAG.getConstant(0, dl, MVT::i8);
+ return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+ subVec, subVec, immVal),
+ Mask, Passthru, Subtarget, DAG);
+ }
default:
break;
}
@@ -15832,23 +17120,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- if (!C)
- llvm_unreachable("Invalid scale type");
- unsigned ScaleVal = C->getZExtValue();
- if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
- llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else {
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
@@ -15860,7 +17142,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
if (Src.getOpcode() == ISD::UNDEF)
- Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+ Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
@@ -15871,25 +17153,19 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- if (!C)
- llvm_unreachable("Invalid scale type");
- unsigned ScaleVal = C->getZExtValue();
- if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
- llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
+ MVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
if (MaskC)
MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
else {
- EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
- Mask.getValueType().getSizeInBits());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
// In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
// are extracted by EXTRACT_SUBVECTOR.
@@ -15907,12 +17183,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Mask, SDValue Base, SDValue Index,
SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
+ auto *C = cast<ConstantSDNode>(ScaleOp);
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT =
+ MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue MaskInReg;
ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
@@ -16034,64 +17309,59 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, DL);
}
-static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
- const Function *Fn = MF.getFunction();
- SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
+ SDValue RegNode = Op.getOperand(2);
+ WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
+ if (!EHInfo)
+ report_fatal_error("EH registrations only live in functions using WinEH");
+
+ // Cast the operand to an alloca, and remember the frame index.
+ auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
+ if (!FINode)
+ report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
+ EHInfo->EHRegNodeFrameIndex = FINode->getIndex();
+
+ // Return the chain operand without making any DAG nodes.
+ return Chain;
+}
- assert(Subtarget->getFrameLowering()->hasFP(MF) &&
- "using llvm.x86.seh.restoreframe requires a frame pointer");
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT VT = TLI.getPointerTy(DAG.getDataLayout());
-
- const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- unsigned FrameReg =
- RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
- unsigned SPReg = RegInfo->getStackRegister();
- unsigned SlotSize = RegInfo->getSlotSize();
+/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
+/// return truncate Store/MaskedStore Node
+static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
+ SelectionDAG &DAG,
+ MVT ElementType) {
+ SDLoc dl(Op);
+ SDValue Mask = Op.getOperand(4);
+ SDValue DataToTruncate = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
- // Get incoming EBP.
- SDValue IncomingEBP =
- DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+ MVT VT = DataToTruncate.getSimpleValueType();
+ MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements());
- // SP is saved in the first field of every registration node, so load
- // [EBP-RegNodeSize] into SP.
- int RegNodeSize = getSEHRegistrationNodeSize(Fn);
- SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
- DAG.getConstant(-RegNodeSize, dl, VT));
- SDValue NewSP =
- DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
- false, VT.getScalarSizeInBits() / 8);
- Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
-
- if (!RegInfo->needsStackRealignment(MF)) {
- // Adjust EBP to point back to the original frame position.
- SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
- } else {
- assert(RegInfo->hasBasePointer(MF) &&
- "functions with Win32 EH must use frame or base pointer register");
+ if (isAllOnesConstant(Mask)) // return just a truncate store
+ return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
+ MachinePointerInfo(), SVT, false, false,
+ SVT.getScalarSizeInBits()/8);
- // Reload the base pointer (ESI) with the adjusted incoming EBP.
- SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+ Mask.getSimpleValueType().getSizeInBits());
+ // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+ // are extracted by EXTRACT_SUBVECTOR.
+ SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+ DAG.getBitcast(BitcastVT, Mask),
+ DAG.getIntPtrConstant(0, dl));
- // Reload the spilled EBP value, now that the stack and base pointers are
- // set up.
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- X86FI->setHasSEHFramePtrSave(true);
- int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
- X86FI->setSEHFramePtrSaveIndex(FI);
- SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
- MachinePointerInfo(), false, false, false,
- VT.getScalarSizeInBits() / 8);
- Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
- }
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore, SVT.getStoreSize(),
+ SVT.getScalarSizeInBits()/8);
- return Chain;
+ return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
+ VMask, SVT, MMO, true);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -16100,16 +17370,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
if (!IntrData) {
- if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
- return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
+ if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
+ return MarkEHRegistrationNode(Op, DAG);
return SDValue();
}
SDLoc dl(Op);
switch(IntrData->Type) {
- default:
- llvm_unreachable("Unknown Intrinsic Type");
- break;
+ default: llvm_unreachable("Unknown Intrinsic Type");
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
@@ -16214,8 +17482,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
- EVT VT = DataToCompress.getValueType();
- if (isAllOnes(Mask)) // return just a store
+ MVT VT = DataToCompress.getSimpleValueType();
+ if (isAllOnesConstant(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
@@ -16227,15 +17495,21 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
MachinePointerInfo(), false, false,
VT.getScalarSizeInBits()/8);
}
+ case TRUNCATE_TO_MEM_VI8:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
+ case TRUNCATE_TO_MEM_VI16:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
+ case TRUNCATE_TO_MEM_VI32:
+ return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
case EXPAND_FROM_MEM: {
SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);
- EVT VT = Op.getValueType();
+ MVT VT = Op.getSimpleValueType();
- if (isAllOnes(Mask)) // return just a load
+ if (isAllOnesConstant(Mask)) // return just a load
return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits()/8);
@@ -16359,6 +17633,21 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
}
+unsigned X86TargetLowering::getExceptionPointerRegister(
+ const Constant *PersonalityFn) const {
+ if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
+ return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+
+ return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
+}
+
+unsigned X86TargetLowering::getExceptionSelectorRegister(
+ const Constant *PersonalityFn) const {
+ // Funclet personalities don't use selectors (the runtime does the selection).
+ assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
+ return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+}
+
SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
@@ -16497,9 +17786,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.hasAttribute(Idx, Attribute::InReg))
+ if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
+ auto &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
- InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+ InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
+ }
if (InRegCount > 2) {
report_fatal_error("Nest register in use - reduce number of inreg"
@@ -16588,8 +17879,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
- MachineMemOperand::MOStore, 2, 2);
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+ MachineMemOperand::MOStore, 2, 2);
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
@@ -16623,12 +17914,75 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
-static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
+//
+// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
+// to 512-bit vector.
+// 2. i8/i16 vector implemented using dword LZCNT vector instruction
+// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+// split the vector, perform operation on it's Lo a Hi part and
+// concatenate the results.
+static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- EVT OpVT = VT;
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (EltVT == MVT::i64 || EltVT == MVT::i32) {
+ // Extend to 512 bit vector.
+ assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ "Unsupported value type for operation");
+
+ MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
+ SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+ DAG.getUNDEF(NewVT),
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ "Unsupported element type");
+
+ if (16 < NumElems) {
+ // Split vector, it's Lo and Hi parts will be handled in next iteration.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+ MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ }
+
+ MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
+
+ assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
+ "Unsupported value type for operation");
+
+ // Use native supported vector instruction vplzcntd.
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
+ SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
+ SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
+
+ return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
+}
+
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
+ if (VT.isVector() && Subtarget->hasAVX512())
+ return LowerVectorCTLZ_AVX512(Op, DAG);
+
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
@@ -16658,7 +18012,8 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
@@ -16686,13 +18041,39 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- unsigned NumBits = VT.getSizeInBits();
+ unsigned NumBits = VT.getScalarSizeInBits();
SDLoc dl(Op);
- Op = Op.getOperand(0);
+
+ if (VT.isVector()) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ SDValue N0 = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ // lsb(x) = (x & -x)
+ SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
+
+ // cttz_undef(x) = (width - 1) - ctlz(lsb)
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+ TLI.isOperationLegal(ISD::CTLZ, VT)) {
+ SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
+ return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
+ DAG.getNode(ISD::CTLZ, dl, VT, LSB));
+ }
+
+ // cttz(x) = ctpop(lsb - 1)
+ SDValue One = DAG.getConstant(1, dl, VT);
+ return DAG.getNode(ISD::CTPOP, dl, VT,
+ DAG.getNode(ISD::SUB, dl, VT, LSB, One));
+ }
+
+ assert(Op.getOpcode() == ISD::CTTZ &&
+ "Only scalar CTTZ requires custom lowering");
// Issue a bsf (scan bits forward) which also sets EFLAGS.
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+ Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
// If src is zero (i.e. bsf sets ZF), returns NumBits.
SDValue Ops[] = {
@@ -16753,6 +18134,13 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
+static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return Lower256IntArith(Op, DAG);
+}
+
static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -16885,7 +18273,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
SDValue AhiBlo = Ahi;
SDValue AloBhi = Bhi;
// Bit cast to 32-bit vectors for MULUDQ
- EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+ MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
(VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
A = DAG.getBitcast(MulVT, A);
B = DAG.getBitcast(MulVT, B);
@@ -16962,7 +18350,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
- EVT VT = Op0.getValueType();
+ MVT VT = Op0.getSimpleValueType();
SDLoc dl(Op);
assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
@@ -17034,7 +18422,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Ops, dl);
}
-// Return true if the requred (according to Opcode) shift-imm form is natively
+// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
@@ -17054,14 +18442,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
}
// The shift amount is a variable, but it is the same for all vector lanes.
-// These instrcutions are defined together with shift-immediate.
+// These instructions are defined together with shift-immediate.
static
bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
-// Return true if the requred (according to Opcode) variable-shift form is
+// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
unsigned Opcode) {
@@ -17133,27 +18521,37 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// i64 SRA needs to be performed as partial shifts.
if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
- Op.getOpcode() == ISD::SRA)
+ Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
return ArithmeticShiftRight64(ShiftAmt);
- if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
+ if (VT == MVT::v16i8 ||
+ (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+ VT == MVT::v64i8) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- if (Op.getOpcode() == ISD::SHL) {
- // Simple i8 add case
- if (ShiftAmt == 1)
- return DAG.getNode(ISD::ADD, dl, VT, R, R);
+ // Simple i8 add case
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ return DAG.getNode(ISD::ADD, dl, VT, R, R);
+
+ // ashr(R, 7) === cmp_slt(R, 0)
+ if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+ SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+ }
+
+ // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+ if (VT == MVT::v16i8 && Subtarget->hasXOP())
+ return SDValue();
+ if (Op.getOpcode() == ISD::SHL) {
// Make a large shift.
SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
R, ShiftAmt, DAG);
SHL = DAG.getBitcast(VT, SHL);
// Zero out the rightmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
@@ -17161,24 +18559,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
R, ShiftAmt, DAG);
SRL = DAG.getBitcast(VT, SRL);
// Zero out the leftmost bits.
- SmallVector<SDValue, 32> V(
- NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+ DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
}
if (Op.getOpcode() == ISD::SRA) {
- if (ShiftAmt == 7) {
- // R s>> 7 === R s< 0
- SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
- }
-
- // R s>> a === ((R u>> a) ^ m) - m
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- SmallVector<SDValue, 32> V(NumElts,
- DAG.getConstant(128 >> ShiftAmt, dl,
- MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
+
+ SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
@@ -17189,35 +18577,51 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
}
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
- if (!Subtarget->is64Bit() &&
- (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
- Amt.getOpcode() == ISD::BITCAST &&
- Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+ if (!Subtarget->is64Bit() && !Subtarget->hasXOP() &&
+ (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) {
+
+ // Peek through any splat that was introduced for i64 shift vectorization.
+ int SplatIndex = -1;
+ if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
+ if (SVN->isSplat()) {
+ SplatIndex = SVN->getSplatIndex();
+ Amt = Amt.getOperand(0);
+ assert(SplatIndex < (int)VT.getVectorNumElements() &&
+ "Splat shuffle referencing second operand");
+ }
+
+ if (Amt.getOpcode() != ISD::BITCAST ||
+ Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
Amt = Amt.getOperand(0);
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
VT.getVectorNumElements();
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
uint64_t ShiftAmt = 0;
+ unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
for (unsigned i = 0; i != Ratio; ++i) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
if (!C)
return SDValue();
// 6 == Log2(64)
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
}
- // Check remaining shift amounts.
- for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
- uint64_t ShAmt = 0;
- for (unsigned j = 0; j != Ratio; ++j) {
- ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
- if (!C)
+
+ // Check remaining shift amounts (if not a splat).
+ if (SplatIndex < 0) {
+ for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+ uint64_t ShAmt = 0;
+ for (unsigned j = 0; j != Ratio; ++j) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+ if (!C)
+ return SDValue();
+ // 6 == Log2(64)
+ ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+ }
+ if (ShAmt != ShiftAmt)
return SDValue();
- // 6 == Log2(64)
- ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
}
- if (ShAmt != ShiftAmt)
- return SDValue();
}
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
@@ -17245,7 +18649,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
SDValue BaseShAmt;
- EVT EltVT = VT.getVectorElementType();
+ MVT EltVT = VT.getVectorElementType();
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
// Check if this build_vector node is doing a splat.
@@ -17262,7 +18666,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
SDValue InVec = Amt.getOperand(0);
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
- assert((SplatIdx < InVec.getValueType().getVectorNumElements()) &&
+ assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) &&
"Unexpected shuffle index found!");
BaseShAmt = InVec.getOperand(SplatIdx);
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
@@ -17327,11 +18731,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return V;
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
- return V;
+ return V;
if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
return Op;
+ // XOP has 128-bit variable logical/arithmetic shifts.
+ // +ve/-ve Amt = shift left/right.
+ if (Subtarget->hasXOP() &&
+ (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+ VT == MVT::v8i16 || VT == MVT::v16i8)) {
+ if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+ SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
+ }
+ if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+ return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
+ if (Op.getOpcode() == ISD::SRA)
+ return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
+ }
+
// 2i64 vector logical shifts can efficiently avoid scalarization - do the
// shifts per-lane and then shuffle the partial results back together.
if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
@@ -17343,6 +18762,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
+ // i64 vector arithmetic shift can be emulated with the transform:
+ // M = lshr(SIGN_BIT, Amt)
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
+ if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) &&
+ Op.getOpcode() == ISD::SRA) {
+ SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+ SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
+ R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ R = DAG.getNode(ISD::XOR, dl, VT, R, M);
+ R = DAG.getNode(ISD::SUB, dl, VT, R, M);
+ return R;
+ }
+
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
@@ -17351,9 +18783,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
(Subtarget->hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
SmallVector<SDValue, 8> Elts;
- EVT SVT = VT.getScalarType();
+ MVT SVT = VT.getVectorElementType();
unsigned SVTBits = SVT.getSizeInBits();
- const APInt &One = APInt(SVTBits, 1);
+ APInt One(SVTBits, 1);
unsigned NumElems = VT.getVectorNumElements();
for (unsigned i=0; i !=NumElems; ++i) {
@@ -17364,7 +18796,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
ConstantSDNode *ND = cast<ConstantSDNode>(Op);
- const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+ APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
uint64_t ShAmt = C.getZExtValue();
if (ShAmt >= SVTBits) {
Elts.push_back(DAG.getUNDEF(SVT));
@@ -17443,7 +18875,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD.
- EVT CastVT = MVT::v4i32;
+ MVT CastVT = MVT::v4i32;
SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
@@ -17507,7 +18939,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
}
- if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
+ if (VT == MVT::v16i8 ||
+ (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
@@ -17627,7 +19060,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
}
- if (Subtarget->hasInt256() && VT == MVT::v16i16) {
+ if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) {
MVT ExtVT = MVT::v8i32;
SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
@@ -17710,7 +19143,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
if (VT.is256BitVector()) {
unsigned NumElems = VT.getVectorNumElements();
MVT EltVT = VT.getVectorElementType();
- EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
// Extract the two vectors
SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
@@ -17743,6 +19176,40 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
}
+static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+
+ assert(VT.isVector() && "Custom lowering only for vector rotates!");
+ assert(Subtarget->hasXOP() && "XOP support required for vector rotates!");
+ assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
+
+ // XOP has 128-bit vector variable + immediate rotates.
+ // +ve/-ve Amt = rotate left/right.
+
+ // Split 256-bit integers.
+ if (VT.is256BitVector())
+ return Lower256IntArith(Op, DAG);
+
+ assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
+
+ // Attempt to rotate by immediate.
+ if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
+ if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
+ uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
+ assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range");
+ return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
+ DAG.getConstant(RotateAmt, DL, MVT::i8));
+ }
+ }
+
+ // Use general rotate by variable (per-element).
+ return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
+}
+
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
@@ -17759,8 +19226,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
@@ -17775,8 +19241,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
- if (C->isOne()) {
+ if (isOneConstant(RHS)) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
@@ -17827,7 +19292,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
/// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
/// Used to know whether to use cmpxchg8/16b when expanding atomic operations
/// (otherwise we leave them alone to become __sync_fetch_and_... calls).
-bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
+bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
unsigned OpWidth = MemType->getPrimitiveSizeInBits();
if (OpWidth == 64)
@@ -17844,21 +19309,23 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
// Note: this turns large loads into lock cmpxchg8b/16b.
// FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
-bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
- return needsCmpXchgNb(PTy->getElementType());
+ return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
- const Type *MemType = AI->getType();
+ Type *MemType = AI->getType();
// If the operand is too big, we must see if cmpxchg8/16b is available
// and default to library calls otherwise.
if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
- return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
- : AtomicRMWExpansionKind::None;
+ return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
}
AtomicRMWInst::BinOp Op = AI->getOperation();
@@ -17869,14 +19336,14 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::Add:
case AtomicRMWInst::Sub:
// It's better to use xadd, xsub or xchg for these in all cases.
- return AtomicRMWExpansionKind::None;
+ return AtomicExpansionKind::None;
case AtomicRMWInst::Or:
case AtomicRMWInst::And:
case AtomicRMWInst::Xor:
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
- return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
- : AtomicRMWExpansionKind::None;
+ return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
+ : AtomicExpansionKind::None;
case AtomicRMWInst::Nand:
case AtomicRMWInst::Max:
case AtomicRMWInst::Min:
@@ -17884,7 +19351,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
case AtomicRMWInst::UMin:
// These always require a non-trivial set of data operations on x86. We must
// use a cmpxchg loop.
- return AtomicRMWExpansionKind::CmpXChg;
+ return AtomicExpansionKind::CmpXChg;
}
}
@@ -17898,7 +19365,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) {
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
- const Type *MemType = AI->getType();
+ Type *MemType = AI->getType();
// Accesses larger than the native width are turned into cmpxchg/libcalls, so
// there is no benefit in turning such RMWs into loads, and it is actually
// harmful as it introduces a mfence.
@@ -17926,7 +19393,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// lowered to just a load without a fence. A mfence flushes the store buffer,
// making the optimization clearly correct.
// FIXME: it is required if isAtLeastRelease(Order) but it is not clear
- // otherwise, we might be able to be more agressive on relaxed idempotent
+ // otherwise, we might be able to be more aggressive on relaxed idempotent
// rmw. In practice, they do not look useful, so we don't try to be
// especially clever.
if (SynchScope == SingleThread)
@@ -18043,7 +19510,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
SDValue InVec = Op->getOperand(0);
SDLoc dl(Op);
unsigned NumElts = SrcVT.getVectorNumElements();
- EVT SVT = SrcVT.getVectorElementType();
+ MVT SVT = SrcVT.getVectorElementType();
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
@@ -18103,7 +19570,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// chunks, thus directly computes the pop count for v2i64 and v4i64.
if (EltVT == MVT::i64) {
SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
- V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros);
+ MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+ V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
return DAG.getBitcast(VT, V);
}
@@ -18119,9 +19587,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
// Do the horizontal sums into two v2i64s.
Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
- Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+ Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, Low), Zeros);
- High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+ High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
DAG.getBitcast(ByteVecVT, High), Zeros);
// Merge them together.
@@ -18311,7 +19780,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- assert(Op.getValueType().isVector() &&
+ assert(Op.getSimpleValueType().isVector() &&
"We only do custom lowering for vector population count.");
return LowerVectorCTPOP(Op, Subtarget, DAG);
}
@@ -18357,7 +19826,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getNode()->getSimpleValueType(0);
+ MVT VT = Op.getNode()->getSimpleValueType(0);
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -18435,31 +19904,203 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
}
+/// Widen a vector input to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
+ bool FillWithZeroes = false) {
+ // Check if InOp already has the right width.
+ MVT InVT = InOp.getSimpleValueType();
+ if (InVT == NVT)
+ return InOp;
+
+ if (InOp.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
+ "Unexpected request for vector widening");
+
+ EVT EltVT = NVT.getVectorElementType();
+
+ SDLoc dl(InOp);
+ if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
+ InOp.getNumOperands() == 2) {
+ SDValue N1 = InOp.getOperand(1);
+ if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
+ N1.isUndef()) {
+ InOp = InOp.getOperand(0);
+ InVT = InOp.getSimpleValueType();
+ InNumElts = InVT.getVectorNumElements();
+ }
+ }
+ if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0; i < InNumElts; ++i)
+ Ops.push_back(InOp.getOperand(i));
+
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+ DAG.getUNDEF(EltVT);
+ for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
+ Ops.push_back(FillVal);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+ }
+ SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
+ DAG.getUNDEF(NVT);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
+ InOp, DAG.getIntPtrConstant(0, dl));
+}
+
static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
assert(Subtarget->hasAVX512() &&
"MGATHER/MSCATTER are supported on AVX-512 arch only");
+ // X86 scatter kills mask register, so its type should be added to
+ // the list of return values.
+ // If the "scatter" has 2 return values, it is already handled.
+ if (Op.getNode()->getNumValues() == 2)
+ return Op;
+
MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
- EVT VT = N->getValue().getValueType();
+ SDValue Src = N->getValue();
+ MVT VT = Src.getSimpleValueType();
assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
SDLoc dl(Op);
- // X86 scatter kills mask register, so its type should be added to
- // the list of return values
- if (N->getNumValues() == 1) {
- SDValue Index = N->getIndex();
- if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
- !Index.getValueType().is512BitVector())
+ SDValue NewScatter;
+ SDValue Index = N->getIndex();
+ SDValue Mask = N->getMask();
+ SDValue Chain = N->getChain();
+ SDValue BasePtr = N->getBasePtr();
+ MVT MemVT = N->getMemoryVT().getSimpleVT();
+ MVT IndexVT = Index.getSimpleValueType();
+ MVT MaskVT = Mask.getSimpleValueType();
+
+ if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
+ // The v2i32 value was promoted to v2i64.
+ // Now we "redo" the type legalizer's work and widen the original
+ // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
+ // with a shuffle.
+ assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
+ "Unexpected memory type");
+ int ShuffleMask[] = {0, 2, -1, -1};
+ Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
+ DAG.getUNDEF(MVT::v4i32), ShuffleMask);
+ // Now we have 4 elements instead of 2.
+ // Expand the index.
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
+ Index = ExtendToType(Index, NewIndexVT, DAG);
+
+ // Expand the mask with zeroes
+ // Mask may be <2 x i64> or <2 x i1> at this moment
+ assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) &&
+ "Unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
+ Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+ VT = MVT::v4i32;
+ }
+
+ unsigned NumElts = VT.getVectorNumElements();
+ if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+ !Index.getSimpleValueType().is512BitVector()) {
+ // AVX512F supports only 512-bit vectors. Or data or index should
+ // be 512 bit wide. If now the both index and data are 256-bit, but
+ // the vector contains 8 elements, we just sign-extend the index
+ if (IndexVT == MVT::v8i32)
+ // Just extend index
Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+ else {
+ // The minimal number of elts in scatter is 8
+ NumElts = 8;
+ // Index
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+ // Use original index here, do not modify the index twice
+ Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
+ if (IndexVT.getScalarType() == MVT::i32)
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+ // Mask
+ // At this point we have promoted mask operand
+ assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+ // Use the original mask here, do not modify the mask twice
+ Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
+
+ // The value that should be stored
+ MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+ Src = ExtendToType(Src, NewVT, DAG);
+ }
+ }
+ // If the mask is "wide" at this point - truncate it to i1 vector
+ MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
+
+ // The mask is killed by scatter, add it to the values
+ SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
+ SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
+ NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
+ return SDValue(NewScatter.getNode(), 0);
+}
+
+static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
- SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
- N->getOperand(3), Index };
+ MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
+ MVT VT = Op.getSimpleValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(Op);
- SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
- DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
- return SDValue(NewScatter.getNode(), 0);
+ if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+ // This operation is legal for targets with VLX, but without
+ // VLX the vector should be widened to 512 bit
+ unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+ MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+ SDValue Src0 = N->getSrc0();
+ Src0 = ExtendToType(Src0, WideDataVT, DAG);
+ Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+ SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
+ N->getBasePtr(), Mask, Src0,
+ N->getMemoryVT(), N->getMemOperand(),
+ N->getExtensionType());
+
+ SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ NewLoad.getValue(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
+ return DAG.getMergeValues(RetOps, dl);
+ }
+ return Op;
+}
+
+static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
+ SDValue DataToStore = N->getValue();
+ MVT VT = DataToStore.getSimpleValueType();
+ SDValue Mask = N->getMask();
+ SDLoc dl(Op);
+
+ if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+ !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+ // This operation is legal for targets with VLX, but without
+ // VLX the vector should be widened to 512 bit
+ unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+ MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+ MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+ DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
+ Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+ return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
+ Mask, N->getMemoryVT(), N->getMemOperand(),
+ N->isTruncatingStore());
}
return Op;
}
@@ -18470,17 +20111,59 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
"MGATHER/MSCATTER are supported on AVX-512 arch only");
MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
- EVT VT = Op.getValueType();
- assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
SDLoc dl(Op);
-
+ MVT VT = Op.getSimpleValueType();
SDValue Index = N->getIndex();
+ SDValue Mask = N->getMask();
+ SDValue Src0 = N->getValue();
+ MVT IndexVT = Index.getSimpleValueType();
+ MVT MaskVT = Mask.getSimpleValueType();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
+
if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
- !Index.getValueType().is512BitVector()) {
- Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
- N->getOperand(3), Index };
- DAG.UpdateNodeOperands(N, Ops);
+ !Index.getSimpleValueType().is512BitVector()) {
+ // AVX512F supports only 512-bit vectors. Or data or index should
+ // be 512 bit wide. If now the both index and data are 256-bit, but
+ // the vector contains 8 elements, we just sign-extend the index
+ if (NumElts == 8) {
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ N->getOperand(3), Index };
+ DAG.UpdateNodeOperands(N, Ops);
+ return Op;
+ }
+
+ // Minimal number of elements in Gather
+ NumElts = 8;
+ // Index
+ MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+ Index = ExtendToType(Index, NewIndexVT, DAG);
+ if (IndexVT.getScalarType() == MVT::i32)
+ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+ // Mask
+ MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
+ // At this point we have promoted mask operand
+ assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+ MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+ Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+ Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
+
+ // The pass-thru value
+ MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+ Src0 = ExtendToType(Src0, NewVT, DAG);
+
+ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+ SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
+ N->getMemoryVT(), dl, Ops,
+ N->getMemOperand());
+ SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ NewGather.getValue(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue RetOps[] = {Exract, NewGather.getValue(1)};
+ return DAG.getMergeValues(RetOps, dl);
}
return Op;
}
@@ -18572,6 +20255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::SETCCE: return LowerSETCCE(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
@@ -18592,12 +20276,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
- case ISD::CTLZ: return LowerCTLZ(Op, DAG);
- case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
- case ISD::CTTZ: return LowerCTTZ(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, Subtarget, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG);
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
case ISD::UMUL_LOHI:
case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
+ case ISD::ROTL: return LowerRotate(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
@@ -18615,7 +20301,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN: return LowerMINMAX(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
+ case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
+ case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG);
case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG);
case ISD::GC_TRANSITION_START:
@@ -18634,14 +20326,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case X86ISD::AVG: {
+ // Legalize types for X86ISD::AVG by expanding vectors.
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+
+ auto InVT = N->getValueType(0);
+ auto InVTSize = InVT.getSizeInBits();
+ const unsigned RegSize =
+ (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
+ assert((!Subtarget->hasAVX512() || RegSize < 512) &&
+ "512-bit vector requires AVX512");
+ assert((!Subtarget->hasAVX2() || RegSize < 256) &&
+ "256-bit vector requires AVX2");
+
+ auto ElemVT = InVT.getVectorElementType();
+ auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+ RegSize / ElemVT.getSizeInBits());
+ assert(RegSize % InVT.getSizeInBits() == 0);
+ unsigned NumConcat = RegSize / InVT.getSizeInBits();
+
+ SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+ Ops[0] = N->getOperand(0);
+ SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+ Ops[0] = N->getOperand(1);
+ SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+
+ SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
+ Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+ DAG.getIntPtrConstant(0, dl)));
+ return;
+ }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
case X86ISD::FMAXC:
case X86ISD::FMAX: {
EVT VT = N->getValueType(0);
- if (VT != MVT::v2f32)
- llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX.");
+ assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
SDValue UNDEF = DAG.getUNDEF(VT);
SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(0), UNDEF);
@@ -18668,17 +20389,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
- // FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
- // (FP_TO_SINT (load f16)) to FP_TO_INT*.
- if (N->getOperand(0).getValueType() == MVT::f16)
- break;
- // fallthrough
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
- if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
- return;
-
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -18707,6 +20420,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
+ // TODO: Are there any fast-math-flags to propagate here?
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
return;
@@ -18740,6 +20454,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
}
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
+ Results.push_back(V);
+ return;
+ }
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
@@ -18748,7 +20467,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
- EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
+ MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
DAG.getConstant(0, dl, HalfT));
@@ -18884,6 +20603,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::IRET: return "X86ISD::IRET";
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
@@ -18910,6 +20630,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::ABS: return "X86ISD::ABS";
+ case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -18937,12 +20658,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VZEXT: return "X86ISD::VZEXT";
case X86ISD::VSEXT: return "X86ISD::VSEXT";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
- case X86ISD::VTRUNCM: return "X86ISD::VTRUNCM";
+ case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
+ case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
+ case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -18978,6 +20701,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::KTEST: return "X86ISD::KTEST";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
@@ -19000,6 +20724,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
+ case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
case X86ISD::SUBV_BROADCAST: return "X86ISD::SUBV_BROADCAST";
case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
case X86ISD::VPERMILPV: return "X86ISD::VPERMILPV";
@@ -19009,11 +20734,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
+ case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
case X86ISD::VRANGE: return "X86ISD::VRANGE";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::PSADBW: return "X86ISD::PSADBW";
+ case X86ISD::DBPSADBW: return "X86ISD::DBPSADBW";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -19022,10 +20749,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SFENCE: return "X86ISD::SFENCE";
case X86ISD::LFENCE: return "X86ISD::LFENCE";
case X86ISD::SEG_ALLOCA: return "X86ISD::SEG_ALLOCA";
- case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL";
case X86ISD::SAHF: return "X86ISD::SAHF";
case X86ISD::RDRAND: return "X86ISD::RDRAND";
case X86ISD::RDSEED: return "X86ISD::RDSEED";
+ case X86ISD::VPMADDUBSW: return "X86ISD::VPMADDUBSW";
+ case X86ISD::VPMADDWD: return "X86ISD::VPMADDWD";
+ case X86ISD::VPROT: return "X86ISD::VPROT";
+ case X86ISD::VPROTI: return "X86ISD::VPROTI";
+ case X86ISD::VPSHA: return "X86ISD::VPSHA";
+ case X86ISD::VPSHL: return "X86ISD::VPSHL";
+ case X86ISD::VPCOM: return "X86ISD::VPCOM";
+ case X86ISD::VPCOMU: return "X86ISD::VPCOMU";
case X86ISD::FMADD: return "X86ISD::FMADD";
case X86ISD::FMSUB: return "X86ISD::FMSUB";
case X86ISD::FNMADD: return "X86ISD::FNMADD";
@@ -19038,7 +20772,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
- case X86ISD::RNDSCALE: return "X86ISD::RNDSCALE";
+ case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
+ case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
+ case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI";
case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI";
case X86ISD::XTEST: return "X86ISD::XTEST";
@@ -19064,6 +20800,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
+ case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS";
+ case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS";
}
return nullptr;
}
@@ -19218,7 +20956,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
bool
X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
+ if (!Subtarget->hasAnyFMA())
return false;
VT = VT.getScalarType();
@@ -19253,11 +20991,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
return false;
// Not for i1 vectors
- if (VT.getScalarType() == MVT::i1)
+ if (VT.getSimpleVT().getScalarType() == MVT::i1)
return false;
// Very little shuffling can be done for 64-bit vectors right now.
- if (VT.getSizeInBits() == 64)
+ if (VT.getSimpleVT().getSizeInBits() == 64)
return false;
// We only care that the types being shuffled are legal. The lowering can
@@ -19282,8 +21020,7 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
DebugLoc DL = MI->getDebugLoc();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// For the v = xbegin(), we generate
//
@@ -19531,8 +21268,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
// Insert the new basic blocks
MF->insert(MBBIter, offsetMBB);
@@ -19702,8 +21438,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
// stores were performed.
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
- MachineFunction::iterator MBBIter = MBB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++MBB->getIterator();
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
@@ -19727,7 +21462,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
- if (!Subtarget->isTargetWin64()) {
+ if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
@@ -19744,9 +21479,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
- MachineMemOperand *MMO =
- F->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+ MachineMemOperand *MMO = F->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
@@ -19800,6 +21534,39 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
return true;
}
+// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
+// together with other CMOV pseudo-opcodes into a single basic-block with
+// conditional jump around it.
+static bool isCMOVPseudo(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case X86::CMOV_FR32:
+ case X86::CMOV_FR64:
+ case X86::CMOV_GR8:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
+ case X86::CMOV_V2F64:
+ case X86::CMOV_V2I64:
+ case X86::CMOV_V4F32:
+ case X86::CMOV_V4F64:
+ case X86::CMOV_V4I64:
+ case X86::CMOV_V16F32:
+ case X86::CMOV_V8F32:
+ case X86::CMOV_V8F64:
+ case X86::CMOV_V8I64:
+ case X86::CMOV_V8I1:
+ case X86::CMOV_V16I1:
+ case X86::CMOV_V32I1:
+ case X86::CMOV_V64I1:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -19811,8 +21578,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -19823,8 +21589,41 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
- // We also lower double CMOVs:
+ // This code lowers all pseudo-CMOV instructions. Generally it lowers these
+ // as described above, by inserting a BB, and then making a PHI at the join
+ // point to select the true and false operands of the CMOV in the PHI.
+ //
+ // The code also handles two different cases of multiple CMOV opcodes
+ // in a row.
+ //
+ // Case 1:
+ // In this case, there are multiple CMOVs in a row, all which are based on
+ // the same condition setting (or the exact opposite condition setting).
+ // In this case we can lower all the CMOVs using a single inserted BB, and
+ // then make a number of PHIs at the join point to model the CMOVs. The only
+ // trickiness here, is that in a case like:
+ //
+ // t2 = CMOV cond1 t1, f1
+ // t3 = CMOV cond1 t2, f2
+ //
+ // when rewriting this into PHIs, we have to perform some renaming on the
+ // temps since you cannot have a PHI operand refer to a PHI result earlier
+ // in the same block. The "simple" but wrong lowering would be:
+ //
+ // t2 = PHI t1(BB1), f1(BB2)
+ // t3 = PHI t2(BB1), f2(BB2)
+ //
+ // but clearly t2 is not defined in BB1, so that is incorrect. The proper
+ // renaming is to note that on the path through BB1, t2 is really just a
+ // copy of t1, and do that renaming, properly generating:
+ //
+ // t2 = PHI t1(BB1), f1(BB2)
+ // t3 = PHI t1(BB1), f2(BB2)
+ //
+ // Case 2, we lower cascaded CMOVs such as
+ //
// (CMOV (CMOV F, T, cc1), T, cc2)
+ //
// to two successives branches. For that, we look for another CMOV as the
// following instruction.
//
@@ -19890,19 +21689,42 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// .LBB5_4:
// retq
//
- MachineInstr *NextCMOV = nullptr;
+ MachineInstr *CascadedCMOV = nullptr;
+ MachineInstr *LastCMOV = MI;
+ X86::CondCode CC = X86::CondCode(MI->getOperand(3).getImm());
+ X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
MachineBasicBlock::iterator NextMIIt =
std::next(MachineBasicBlock::iterator(MI));
- if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+
+ // Check for case 1, where there are multiple CMOVs with the same condition
+ // first. Of the two cases of multiple CMOV lowerings, case 1 reduces the
+ // number of jumps the most.
+
+ if (isCMOVPseudo(MI)) {
+ // See if we have a string of CMOVS with the same condition.
+ while (NextMIIt != BB->end() &&
+ isCMOVPseudo(NextMIIt) &&
+ (NextMIIt->getOperand(3).getImm() == CC ||
+ NextMIIt->getOperand(3).getImm() == OppCC)) {
+ LastCMOV = &*NextMIIt;
+ ++NextMIIt;
+ }
+ }
+
+ // This checks for case 2, but only do this if we didn't already find
+ // case 1, as indicated by LastCMOV == MI.
+ if (LastCMOV == MI &&
+ NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
- NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
- NextCMOV = &*NextMIIt;
+ NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) {
+ CascadedCMOV = &*NextMIIt;
+ }
MachineBasicBlock *jcc1MBB = nullptr;
- // If we have a double CMOV, we lower it to two successive branches to
+ // If we have a cascaded CMOV, we lower it to two successive branches to
// the same block. EFLAGS is used by both, so mark it as live in the second.
- if (NextCMOV) {
+ if (CascadedCMOV) {
jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, jcc1MBB);
jcc1MBB->addLiveIn(X86::EFLAGS);
@@ -19917,7 +21739,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// live into the sink and copy blocks.
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+ MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -19926,12 +21748,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
- if (NextCMOV) {
- // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+ if (CascadedCMOV) {
+ // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV.
BB->addSuccessor(jcc1MBB);
// In that case, jcc1MBB will itself fallthrough the copy0MBB, and
@@ -19946,13 +21768,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ unsigned Opc = X86::GetCondBranchFromCond(CC);
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
- if (NextCMOV) {
+ if (CascadedCMOV) {
unsigned Opc2 = X86::GetCondBranchFromCond(
- (X86::CondCode)NextCMOV->getOperand(3).getImm());
+ (X86::CondCode)CascadedCMOV->getOperand(3).getImm());
BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
}
@@ -19964,28 +21785,110 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- MachineInstrBuilder MIB =
- BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
+ MachineBasicBlock::iterator MIItEnd =
+ std::next(MachineBasicBlock::iterator(LastCMOV));
+ MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin();
+ DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
+ MachineInstrBuilder MIB;
+
+ // As we are creating the PHIs, we have to be careful if there is more than
+ // one. Later CMOVs may reference the results of earlier CMOVs, but later
+ // PHIs have to reference the individual true/false inputs from earlier PHIs.
+ // That also means that PHI construction must work forward from earlier to
+ // later, and that the code must maintain a mapping from earlier PHI's
+ // destination registers, and the registers that went into the PHI.
- // If we have a double CMOV, the second Jcc provides the same incoming
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+ unsigned DestReg = MIIt->getOperand(0).getReg();
+ unsigned Op1Reg = MIIt->getOperand(1).getReg();
+ unsigned Op2Reg = MIIt->getOperand(2).getReg();
+
+ // If this CMOV we are generating is the opposite condition from
+ // the jump we generated, then we have to swap the operands for the
+ // PHI that is going to be generated.
+ if (MIIt->getOperand(3).getImm() == OppCC)
+ std::swap(Op1Reg, Op2Reg);
+
+ if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
+ Op1Reg = RegRewriteTable[Op1Reg].first;
+
+ if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
+ Op2Reg = RegRewriteTable[Op2Reg].second;
+
+ MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL,
+ TII->get(X86::PHI), DestReg)
+ .addReg(Op1Reg).addMBB(copy0MBB)
+ .addReg(Op2Reg).addMBB(thisMBB);
+
+ // Add this PHI to the rewrite table.
+ RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
+ }
+
+ // If we have a cascaded CMOV, the second Jcc provides the same incoming
// value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
- if (NextCMOV) {
+ if (CascadedCMOV) {
MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
// Copy the PHI result to the register defined by the second CMOV.
BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
- DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+ DL, TII->get(TargetOpcode::COPY),
+ CascadedCMOV->getOperand(0).getReg())
.addReg(MI->getOperand(0).getReg());
- NextCMOV->eraseFromParent();
+ CascadedCMOV->eraseFromParent();
}
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ // Now remove the CMOV(s).
+ for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; )
+ (MIIt++)->eraseFromParent();
+
return sinkMBB;
}
MachineBasicBlock *
+X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // Combine the following atomic floating-point modification pattern:
+ // a.store(reg OP a.load(acquire), release)
+ // Transform them into:
+ // OPss (%gpr), %xmm
+ // movss %xmm, (%gpr)
+ // Or sd equivalent for 64-bit operations.
+ unsigned MOp, FOp;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
+ case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
+ case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
+ }
+ const X86InstrInfo *TII = Subtarget->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ MachineOperand MSrc = MI->getOperand(0);
+ unsigned VSrc = MI->getOperand(5).getReg();
+ const MachineOperand &Disp = MI->getOperand(3);
+ MachineOperand ZeroDisp = MachineOperand::CreateImm(0);
+ bool hasDisp = Disp.isGlobal() || Disp.isImm();
+ if (hasDisp && MSrc.isReg())
+ MSrc.setIsKill(false);
+ MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp))
+ .addOperand(/*Base=*/MSrc)
+ .addImm(/*Scale=*/1)
+ .addReg(/*Index=*/0)
+ .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+ .addReg(0);
+ MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp),
+ MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
+ .addReg(VSrc)
+ .addOperand(/*Base=*/MSrc)
+ .addImm(/*Scale=*/1)
+ .addReg(/*Index=*/0)
+ .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+ .addReg(/*Segment=*/0);
+ MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill);
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
@@ -20032,8 +21935,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
sizeVReg = MI->getOperand(1).getReg(),
physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
- MachineFunction::iterator MBBIter = BB;
- ++MBBIter;
+ MachineFunction::iterator MBBIter = ++BB->getIterator();
MF->insert(MBBIter, bumpMBB);
MF->insert(MBBIter, mallocMBB);
@@ -20120,14 +22022,60 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
+ assert(!Subtarget->isTargetMachO());
DebugLoc DL = MI->getDebugLoc();
+ MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe(
+ *BB->getParent(), *BB, MI, DL, false);
+ MachineBasicBlock *ResumeBB = ResumeMI->getParent();
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return ResumeBB;
+}
- assert(!Subtarget->isTargetMachO());
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB();
+ DebugLoc DL = MI->getDebugLoc();
- Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI,
- DL);
+ assert(!isAsynchronousEHPersonality(
+ classifyEHPersonality(MF->getFunction()->getPersonalityFn())) &&
+ "SEH does not use catchret!");
- MI->eraseFromParent(); // The pseudo instruction is gone now.
+ // Only 32-bit EH needs to worry about manually restoring stack pointers.
+ if (!Subtarget->is32Bit())
+ return BB;
+
+ // C++ EH creates a new target block to hold the restore code, and wires up
+ // the new block to the return destination with a normal JMP_4.
+ MachineBasicBlock *RestoreMBB =
+ MF->CreateMachineBasicBlock(BB->getBasicBlock());
+ assert(BB->succ_size() == 1);
+ MF->insert(std::next(BB->getIterator()), RestoreMBB);
+ RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
+ BB->addSuccessor(RestoreMBB);
+ MI->getOperand(0).setMBB(RestoreMBB);
+
+ auto RestoreMBBI = RestoreMBB->begin();
+ BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
+ BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const Constant *PerFn = MF->getFunction()->getPersonalityFn();
+ bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
+ // Only 32-bit SEH requires special handling for catchpad.
+ if (IsSEH && Subtarget->is32Bit()) {
+ const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
+ }
+ MI->eraseFromParent();
return BB;
}
@@ -20149,6 +22097,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
+ Subtarget->is64Bit() ?
+ Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() :
Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -20198,8 +22148,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
+ MachineFunction::iterator I = ++MBB->getIterator();
// Memory Reference
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -20225,7 +22174,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// For v = setjmp(buf), we generate
//
// thisMBB:
- // buf[LabelOffset] = restoreMBB
+ // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
@@ -20245,6 +22194,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MF->insert(I, mainMBB);
MF->insert(I, sinkMBB);
MF->push_back(restoreMBB);
+ restoreMBB->setHasAddressTaken();
MachineInstrBuilder MIB;
@@ -20511,35 +22461,44 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
+ case X86::CATCHRET:
+ return EmitLoweredCatchRet(MI, BB);
+ case X86::CATCHPAD:
+ return EmitLoweredCatchPad(MI, BB);
case X86::SEG_ALLOCA_32:
case X86::SEG_ALLOCA_64:
return EmitLoweredSegAlloca(MI, BB);
case X86::TLSCall_32:
case X86::TLSCall_64:
return EmitLoweredTLSCall(MI, BB);
- case X86::CMOV_GR8:
case X86::CMOV_FR32:
case X86::CMOV_FR64:
- case X86::CMOV_V4F32:
+ case X86::CMOV_FR128:
+ case X86::CMOV_GR8:
+ case X86::CMOV_GR16:
+ case X86::CMOV_GR32:
+ case X86::CMOV_RFP32:
+ case X86::CMOV_RFP64:
+ case X86::CMOV_RFP80:
case X86::CMOV_V2F64:
case X86::CMOV_V2I64:
- case X86::CMOV_V8F32:
+ case X86::CMOV_V4F32:
case X86::CMOV_V4F64:
case X86::CMOV_V4I64:
case X86::CMOV_V16F32:
+ case X86::CMOV_V8F32:
case X86::CMOV_V8F64:
case X86::CMOV_V8I64:
- case X86::CMOV_GR16:
- case X86::CMOV_GR32:
- case X86::CMOV_RFP32:
- case X86::CMOV_RFP64:
- case X86::CMOV_RFP80:
case X86::CMOV_V8I1:
case X86::CMOV_V16I1:
case X86::CMOV_V32I1:
case X86::CMOV_V64I1:
return EmitLoweredSelect(MI, BB);
+ case X86::RELEASE_FADD32mr:
+ case X86::RELEASE_FADD64mr:
+ return EmitLoweredAtomicFP(MI, BB);
+
case X86::FP32_TO_INT16_IN_MEM:
case X86::FP32_TO_INT32_IN_MEM:
case X86::FP32_TO_INT64_IN_MEM:
@@ -20793,7 +22752,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
- return Op.getValueType().getScalarType().getSizeInBits();
+ return Op.getValueType().getScalarSizeInBits();
// Fallback case.
return 1;
@@ -20814,39 +22773,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
-/// same as extracting the high 128-bit part of 256-bit vector and then
-/// inserting the result into the low part of a new 256-bit vector
-static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
-/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
-/// same as extracting the low 128-bit part of 256-bit vector and then
-/// inserting the result into the high part of a new 256-bit vector
-static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
- EVT VT = SVOp->getValueType(0);
- unsigned NumElems = VT.getVectorNumElements();
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
- if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
- SVOp->getMaskElt(j) >= 0)
- return false;
-
- return true;
-}
-
/// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+/// FIXME: This could be expanded to support 512 bit vectors as well.
static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget* Subtarget) {
@@ -20854,7 +22782,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
- EVT VT = SVOp->getValueType(0);
+ MVT VT = SVOp->getSimpleValueType(0);
unsigned NumElems = VT.getVectorNumElements();
if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
@@ -20920,24 +22848,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return DCI.CombineTo(N, InsV);
}
- //===--------------------------------------------------------------------===//
- // Combine some shuffles into subvector extracts and inserts:
- //
-
- // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
- if (isShuffleHigh128VectorInsertLow(SVOp)) {
- SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
- // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
- if (isShuffleLow128VectorInsertHigh(SVOp)) {
- SDValue V = Extract128BitVector(V1, 0, DAG, dl);
- SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
- return DCI.CombineTo(N, InsV);
- }
-
return SDValue();
}
@@ -20966,10 +22876,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
MVT RootVT = Root.getSimpleValueType();
SDLoc DL(Root);
- // Just remove no-op shuffle masks.
if (Mask.size() == 1) {
- DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
- /*AddTo*/ true);
+ int Index = Mask[0];
+ assert((Index >= 0 || Index == SM_SentinelUndef ||
+ Index == SM_SentinelZero) &&
+ "Invalid shuffle index found!");
+
+ // We may end up with an accumulated mask of size 1 as a result of
+ // widening of shuffle operands (see function canWidenShuffleElements).
+ // If the only shuffle index is equal to SM_SentinelZero then propagate
+ // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+ // mask, and therefore the entire chain of shuffles can be folded away.
+ if (Index == SM_SentinelZero)
+ DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+ else
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+ /*AddTo*/ true);
return true;
}
@@ -20985,7 +22907,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// doesn't preclude something switching to the shorter encoding post-RA.
//
// FIXME: Should teach these routines about AVX vector widths.
- if (FloatDomain && VT.getSizeInBits() == 128) {
+ if (FloatDomain && VT.is128BitVector()) {
if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
bool Lo = Mask.equals({0, 0});
unsigned Shuffle;
@@ -21049,7 +22971,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
// variants as none of these have single-instruction variants that are
// superior to the UNPCK formulation.
- if (!FloatDomain && VT.getSizeInBits() == 128 &&
+ if (!FloatDomain && VT.is128BitVector() &&
(Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
@@ -21226,26 +23148,28 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
// See if we can recurse into the operand to combine more things.
switch (Op.getOpcode()) {
- case X86ISD::PSHUFB:
- HasPSHUFB = true;
- case X86ISD::PSHUFD:
- case X86ISD::PSHUFHW:
- case X86ISD::PSHUFLW:
- if (Op.getOperand(0).hasOneUse() &&
- combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
- HasPSHUFB, DAG, DCI, Subtarget))
- return true;
- break;
+ case X86ISD::PSHUFB:
+ HasPSHUFB = true;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ if (Op.getOperand(0).hasOneUse() &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
- case X86ISD::UNPCKL:
- case X86ISD::UNPCKH:
- assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
- // We can't check for single use, we have to check that this shuffle is the only user.
- if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
- combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
- HasPSHUFB, DAG, DCI, Subtarget))
- return true;
- break;
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ assert(Op.getOperand(0) == Op.getOperand(1) &&
+ "We only combine unary shuffles!");
+ // We can't check for single use, we have to check that this shuffle is the
+ // only user.
+ if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+ combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+ HasPSHUFB, DAG, DCI, Subtarget))
+ return true;
+ break;
}
// Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -21360,8 +23284,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
case X86ISD::UNPCKH:
// For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
// shuffle into a preceding word shuffle.
- if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
- V.getSimpleValueType().getScalarType() != MVT::i16)
+ if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
+ V.getSimpleValueType().getVectorElementType() != MVT::i16)
return SDValue();
// Search for a half-shuffle which we can combine with.
@@ -21438,7 +23362,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
return V;
}
-/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
+/// \brief Search for a combinable shuffle across a chain ending in pshuflw or
+/// pshufhw.
///
/// We walk up the chain, skipping shuffles of the other half and looking
/// through shuffles which switch halves trying to find a shuffle of the same
@@ -21520,6 +23445,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
+ case X86ISD::UNPCKL: {
+ // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
+ // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
+ // moves upper half elements into the lower half part. For example:
+ //
+ // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
+ // undef:v16i8
+ // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
+ //
+ // will be combined to:
+ //
+ // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
+
+ // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
+ // happen due to advanced instructions.
+ if (!VT.is128BitVector())
+ return SDValue();
+
+ auto Op0 = N.getOperand(0);
+ auto Op1 = N.getOperand(1);
+ if (Op0.getOpcode() == ISD::UNDEF &&
+ Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<int, 8> ExpectedMask(NumElts, -1);
+ std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
+ NumElts / 2);
+
+ auto ShufOp = Op1.getOperand(0);
+ if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
+ return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
+ }
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -21535,7 +23495,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
- assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
+ assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
return SDValue(); // We combined away this shuffle, so we're done.
@@ -21624,14 +23584,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
return SDValue();
auto *SVN = cast<ShuffleVectorSDNode>(N);
- ArrayRef<int> Mask = SVN->getMask();
+ SmallVector<int, 8> Mask;
+ for (int M : SVN->getMask())
+ Mask.push_back(M);
+
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
- // We require the first shuffle operand to be the SUB node, and the second to
- // be the ADD node.
- // FIXME: We should support the commuted patterns.
- if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+ // We require the first shuffle operand to be the FSUB node, and the second to
+ // be the FADD node.
+ if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(V1, V2);
+ } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
return SDValue();
// If there are other uses of these operations we can't fold them.
@@ -21682,7 +23647,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return AddSub;
// Combine 256-bit vector shuffles. This is only profitable when in AVX mode
- if (Subtarget->hasFp256() && VT.is256BitVector() &&
+ if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() &&
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
@@ -21866,21 +23831,45 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
-/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
-/// special and don't usually play with other vector types, it's better to
-/// handle them early to be sure we emit efficient code by avoiding
-/// store-load conversions.
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
- if (N->getValueType(0) != MVT::x86mmx ||
- N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
- N->getOperand(0)->getValueType(0) != MVT::v2i32)
- return SDValue();
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
- SDValue V = N->getOperand(0);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
- return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
- N->getValueType(0), V.getOperand(0));
+ // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+ // special and don't usually play with other vector types, it's better to
+ // handle them early to be sure we emit efficient code by avoiding
+ // store-load conversions.
+ if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
+ N0.getValueType() == MVT::v2i32 &&
+ isNullConstant(N0.getOperand(1))) {
+ SDValue N00 = N0->getOperand(0);
+ if (N00.getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
+ }
+
+ // Convert a bitcasted integer logic operation that has one bitcasted
+ // floating-point operand and one constant operand into a floating-point
+ // logic operation. This may create a load of the constant, but that is
+ // cheaper than materializing the constant in an integer register and
+ // transferring it to an SSE register or transferring the SSE operand to
+ // integer register and back.
+ unsigned FPOpcode;
+ switch (N0.getOpcode()) {
+ case ISD::AND: FPOpcode = X86ISD::FAND; break;
+ case ISD::OR: FPOpcode = X86ISD::FOR; break;
+ case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+ default: return SDValue();
+ }
+ if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+ (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getOperand(0).getValueType() == VT) {
+ SDValue N000 = N0.getOperand(0).getOperand(0);
+ SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+ return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+ }
return SDValue();
}
@@ -21910,26 +23899,26 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
InputVector.getNode()->getOperand(0));
// The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
- SDValue MMXSrcOp = MMXSrc.getOperand(0);
if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
- MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
- MMXSrcOp.getOpcode() == ISD::BITCAST &&
- MMXSrcOp.getValueType() == MVT::v1i64 &&
- MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
- return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
- N->getValueType(0),
- MMXSrcOp.getOperand(0));
+ MMXSrc.getValueType() == MVT::i64) {
+ SDValue MMXSrcOp = MMXSrc.getOperand(0);
+ if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST &&
+ MMXSrcOp.getValueType() == MVT::v1i64 &&
+ MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
+ return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+ N->getValueType(0), MMXSrcOp.getOperand(0));
+ }
}
EVT VT = N->getValueType(0);
- if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
+ if (VT == MVT::i1 && isa<ConstantSDNode>(N->getOperand(1)) &&
InputVector.getOpcode() == ISD::BITCAST &&
- dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
+ isa<ConstantSDNode>(InputVector.getOperand(0))) {
uint64_t ExtractedElt =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
uint64_t InputValue =
- cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
@@ -22036,96 +24025,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static std::pair<unsigned, bool>
-matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const X86Subtarget *Subtarget) {
- if (!VT.isVector())
- return std::make_pair(0, false);
-
- bool NeedSplit = false;
- switch (VT.getSimpleVT().SimpleTy) {
- default: return std::make_pair(0, false);
- case MVT::v4i64:
- case MVT::v2i64:
- if (!Subtarget->hasVLX())
- return std::make_pair(0, false);
- break;
- case MVT::v64i8:
- case MVT::v32i16:
- if (!Subtarget->hasBWI())
- return std::make_pair(0, false);
- break;
- case MVT::v16i32:
- case MVT::v8i64:
- if (!Subtarget->hasAVX512())
- return std::make_pair(0, false);
- break;
- case MVT::v32i8:
- case MVT::v16i16:
- case MVT::v8i32:
- if (!Subtarget->hasAVX2())
- NeedSplit = true;
- if (!Subtarget->hasAVX())
- return std::make_pair(0, false);
- break;
- case MVT::v16i8:
- case MVT::v8i16:
- case MVT::v4i32:
- if (!Subtarget->hasSSE2())
- return std::make_pair(0, false);
- }
-
- // SSE2 has only a small subset of the operations.
- bool hasUnsigned = Subtarget->hasSSE41() ||
- (Subtarget->hasSSE2() && VT == MVT::v16i8);
- bool hasSigned = Subtarget->hasSSE41() ||
- (Subtarget->hasSSE2() && VT == MVT::v8i16);
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- unsigned Opc = 0;
- // Check for x CC y ? x : y.
- if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
- DAG.isEqualTo(RHS, Cond.getOperand(1))) {
- switch (CC) {
- default: break;
- case ISD::SETULT:
- case ISD::SETULE:
- Opc = hasUnsigned ? ISD::UMIN : 0; break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- Opc = hasUnsigned ? ISD::UMAX : 0; break;
- case ISD::SETLT:
- case ISD::SETLE:
- Opc = hasSigned ? ISD::SMIN : 0; break;
- case ISD::SETGT:
- case ISD::SETGE:
- Opc = hasSigned ? ISD::SMAX : 0; break;
- }
- // Check for x CC y ? y : x -- a min/max with reversed arms.
- } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
- DAG.isEqualTo(RHS, Cond.getOperand(0))) {
- switch (CC) {
- default: break;
- case ISD::SETULT:
- case ISD::SETULE:
- Opc = hasUnsigned ? ISD::UMAX : 0; break;
- case ISD::SETUGT:
- case ISD::SETUGE:
- Opc = hasUnsigned ? ISD::UMIN : 0; break;
- case ISD::SETLT:
- case ISD::SETLE:
- Opc = hasSigned ? ISD::SMAX : 0; break;
- case ISD::SETGT:
- case ISD::SETGE:
- Opc = hasSigned ? ISD::SMIN : 0; break;
- }
- }
-
- return std::make_pair(Opc, NeedSplit);
-}
-
static SDValue
transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -22189,7 +24088,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// ignored in unsafe-math mode).
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
- VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
+ VT != MVT::f80 && VT != MVT::f128 &&
+ (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -22535,32 +24435,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // Try to match a min/max vector operation.
- if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
- std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
- unsigned Opc = ret.first;
- bool NeedSplit = ret.second;
-
- if (Opc && NeedSplit) {
- unsigned NumElems = VT.getVectorNumElements();
- // Extract the LHS vectors
- SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
- SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
-
- // Extract the RHS vectors
- SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
- SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
-
- // Create min/max for each subvector
- LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
- RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
-
- // Merge the result
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
- } else if (Opc)
- return DAG.getNode(Opc, DL, VT, LHS, RHS);
- }
-
// Simplify vector selection if condition value type matches vselect
// operand type
if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
@@ -22635,7 +24509,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
- unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+ unsigned BitWidth = Cond.getValueType().getScalarSizeInBits();
// Don't optimize vector selects that map to mask-registers.
if (BitWidth == 1)
@@ -22656,14 +24530,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// FIXME: We don't support i16-element blends currently. We could and
// should support them by making *all* the bits in the condition be set
// rather than just the high bit and using an i8-element blend.
- if (VT.getScalarType() == MVT::i16)
+ if (VT.getVectorElementType() == MVT::i16)
return SDValue();
// Dynamic blending was only available from SSE4.1 onward.
- if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+ if (VT.is128BitVector() && !Subtarget->hasSSE41())
return SDValue();
// Byte blends are only available in AVX2
- if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
- !Subtarget->hasAVX2())
+ if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -22773,12 +24646,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
- ConstantSDNode *CS;
- if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
- CS->getZExtValue() == 1)
+ if (isOneConstant(SetCC.getOperand(0)))
OpIdx = 1;
- if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
- CS->getZExtValue() == 1)
+ if (isOneConstant(SetCC.getOperand(1)))
OpIdx = 0;
if (OpIdx == -1)
break;
@@ -22857,8 +24727,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
X86::CondCode &CC1, SDValue &Flags,
bool &isAnd) {
if (Cond->getOpcode() == X86ISD::CMP) {
- ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
- if (!CondOp1C || !CondOp1C->isNullValue())
+ if (!isNullConstant(Cond->getOperand(1)))
return false;
Cond = Cond->getOperand(0);
@@ -23102,106 +24971,15 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default: return SDValue();
- // SSE/AVX/AVX2 blend intrinsics.
- case Intrinsic::x86_avx2_pblendvb:
- // Don't try to simplify this intrinsic if we don't have AVX2.
- if (!Subtarget->hasAVX2())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_avx_blendv_pd_256:
- case Intrinsic::x86_avx_blendv_ps_256:
- // Don't try to simplify this intrinsic if we don't have AVX.
- if (!Subtarget->hasAVX())
- return SDValue();
- // FALL-THROUGH
- case Intrinsic::x86_sse41_blendvps:
- case Intrinsic::x86_sse41_blendvpd:
- case Intrinsic::x86_sse41_pblendvb: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- SDValue Mask = N->getOperand(3);
-
- // Don't try to simplify this intrinsic if we don't have SSE4.1.
- if (!Subtarget->hasSSE41())
- return SDValue();
-
- // fold (blend A, A, Mask) -> A
- if (Op0 == Op1)
- return Op0;
- // fold (blend A, B, allZeros) -> A
- if (ISD::isBuildVectorAllZeros(Mask.getNode()))
- return Op0;
- // fold (blend A, B, allOnes) -> B
- if (ISD::isBuildVectorAllOnes(Mask.getNode()))
- return Op1;
-
- // Simplify the case where the mask is a constant i32 value.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
- if (C->isNullValue())
- return Op0;
- if (C->isAllOnesValue())
- return Op1;
- }
-
- return SDValue();
- }
-
- // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
- case Intrinsic::x86_sse2_psrai_w:
- case Intrinsic::x86_sse2_psrai_d:
- case Intrinsic::x86_avx2_psrai_w:
- case Intrinsic::x86_avx2_psrai_d:
- case Intrinsic::x86_sse2_psra_w:
- case Intrinsic::x86_sse2_psra_d:
- case Intrinsic::x86_avx2_psra_w:
- case Intrinsic::x86_avx2_psra_d: {
- SDValue Op0 = N->getOperand(1);
- SDValue Op1 = N->getOperand(2);
- EVT VT = Op0.getValueType();
- assert(VT.isVector() && "Expected a vector type!");
-
- if (isa<BuildVectorSDNode>(Op1))
- Op1 = Op1.getOperand(0);
-
- if (!isa<ConstantSDNode>(Op1))
- return SDValue();
-
- EVT SVT = VT.getVectorElementType();
- unsigned SVTBits = SVT.getSizeInBits();
-
- ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
- const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
- uint64_t ShAmt = C.getZExtValue();
-
- // Don't try to convert this shift into a ISD::SRA if the shift
- // count is bigger than or equal to the element size.
- if (ShAmt >= SVTBits)
- return SDValue();
-
- // Trivial case: if the shift count is zero, then fold this
- // into the first operand.
- if (ShAmt == 0)
- return Op0;
-
- // Replace this packed shift intrinsic with a target independent
- // shift dag node.
- SDLoc DL(N);
- SDValue Splat = DAG.getConstant(C, DL, VT);
- return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat);
- }
- }
-}
-
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
+ // An imul is usually smaller than the alternative sequence.
+ if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -23228,9 +25006,11 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
MulAmt1 = 3;
MulAmt2 = MulAmt / 3;
}
+
+ SDLoc DL(N);
+ SDValue NewMul;
if (MulAmt2 &&
(isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
- SDLoc DL(N);
if (isPowerOf2_64(MulAmt2) &&
!(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -23239,7 +25019,6 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
// is an add.
std::swap(MulAmt1, MulAmt2);
- SDValue NewMul;
if (isPowerOf2_64(MulAmt1))
NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
@@ -23253,10 +25032,31 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
else
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
DAG.getConstant(MulAmt2, DL, VT));
+ }
+
+ if (!NewMul) {
+ assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
+ && "Both cases that could cause potential overflows should have "
+ "already been handled.");
+ if (isPowerOf2_64(MulAmt - 1))
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt - 1), DL,
+ MVT::i8)));
+ else if (isPowerOf2_64(MulAmt + 1))
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
+ N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt + 1),
+ DL, MVT::i8)), N->getOperand(0));
+ }
+
+ if (NewMul)
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, NewMul, false);
- }
+
return SDValue();
}
@@ -23272,18 +25072,34 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
N1C && N0.getOpcode() == ISD::AND &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
- ((N00.getOpcode() == ISD::ANY_EXTEND ||
- N00.getOpcode() == ISD::ZERO_EXTEND) &&
- N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
- APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- APInt ShAmt = N1C->getAPIntValue();
- Mask = Mask.shl(ShAmt);
- if (Mask != 0) {
- SDLoc DL(N);
- return DAG.getNode(ISD::AND, DL, VT,
- N00, DAG.getConstant(Mask, DL, VT));
- }
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ bool MaskOK = false;
+ // We can handle cases concerning bit-widening nodes containing setcc_c if
+ // we carefully interrogate the mask to make sure we are semantics
+ // preserving.
+ // The transform is not safe if the result of C1 << C2 exceeds the bitwidth
+ // of the underlying setcc_c operation if the setcc_c was zero extended.
+ // Consider the following example:
+ // zext(setcc_c) -> i32 0x0000FFFF
+ // c1 -> i32 0x0000FFFF
+ // c2 -> i32 0x00000001
+ // (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
+ // (and setcc_c, (c1 << c2)) -> i32 0x0000FFFE
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = true;
+ } else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = true;
+ } else if ((N00.getOpcode() == ISD::ZERO_EXTEND ||
+ N00.getOpcode() == ISD::ANY_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
+ }
+ if (MaskOK && Mask != 0) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
}
}
@@ -23304,6 +25120,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned Size = VT.getSizeInBits();
+
+ // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
+ // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
+ // into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
+ // depending on sign of (SarConst - [56,48,32,24,16])
+
+ // sexts in X86 are MOVs. The MOVs have the same code size
+ // as above SHIFTs (only SHIFT on 1 has lower code size).
+ // However the MOVs have 2 advantages to a SHIFT:
+ // 1. MOVs can write to a register that differs from source
+ // 2. MOVs accept memory operands
+
+ if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
+ N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
+ N0.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
+ APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
+ EVT CVT = N1.getValueType();
+
+ if (SarConst.isNegative())
+ return SDValue();
+
+ for (MVT SVT : MVT::integer_valuetypes()) {
+ unsigned ShiftSize = SVT.getSizeInBits();
+ // skipping types without corresponding sext/zext and
+ // ShlConst that is not one of [56,48,32,24,16]
+ if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
+ continue;
+ SDLoc DL(N);
+ SDValue NN =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
+ SarConst = SarConst - (Size - ShiftSize);
+ if (SarConst == 0)
+ return NN;
+ else if (SarConst.isNegative())
+ return DAG.getNode(ISD::SHL, DL, VT, NN,
+ DAG.getConstant(-SarConst, DL, CVT));
+ else
+ return DAG.getNode(ISD::SRA, DL, VT, NN,
+ DAG.getConstant(SarConst, DL, CVT));
+ }
+ return SDValue();
+}
+
/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
@@ -23321,14 +25190,15 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
APInt ShiftAmt = AmtSplat->getAPIntValue();
- unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
+ unsigned MaxAmount =
+ VT.getSimpleVT().getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
// if the shift amount is bigger than or equal to
// the element size. The constant shift amount will be
// encoded as a 8-bit immediate.
if (ShiftAmt.trunc(8).uge(MaxAmount))
- return getZeroVector(VT, Subtarget, DAG, DL);
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
}
return SDValue();
@@ -23342,6 +25212,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (SDValue V = PerformSHLCombine(N, DAG))
return V;
+ if (N->getOpcode() == ISD::SRA)
+ if (SDValue V = PerformSRACombine(N, DAG))
+ return V;
+
// Try to fold this logical shift into a zero vector.
if (N->getOpcode() != ISD::SRA)
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
@@ -23537,7 +25411,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
if (RHSConstSplat) {
- N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
SDValue(RHSConstSplat, 0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
@@ -23552,9 +25426,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
case ISD::ANY_EXTEND:
return Op;
case ISD::ZERO_EXTEND: {
- unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+ unsigned InBits = NarrowVT.getScalarSizeInBits();
APInt Mask = APInt::getAllOnesValue(InBits);
- Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+ Mask = Mask.zext(VT.getScalarSizeInBits());
return DAG.getNode(ISD::AND, DL, VT,
Op, DAG.getConstant(Mask, DL, VT));
}
@@ -23656,6 +25530,41 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getBitcast(N0.getValueType(), NewShuffle);
}
+/// If both input operands of a logic op are being cast from floating point
+/// types, try to convert this into a floating point logic node to avoid
+/// unnecessary moves from SSE to integer registers.
+static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ unsigned FPOpcode = ISD::DELETED_NODE;
+ if (N->getOpcode() == ISD::AND)
+ FPOpcode = X86ISD::FAND;
+ else if (N->getOpcode() == ISD::OR)
+ FPOpcode = X86ISD::FOR;
+ else if (N->getOpcode() == ISD::XOR)
+ FPOpcode = X86ISD::FXOR;
+
+ assert(FPOpcode != ISD::DELETED_NODE &&
+ "Unexpected input node for FP logic conversion");
+
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+ ((Subtarget->hasSSE1() && VT == MVT::i32) ||
+ (Subtarget->hasSSE2() && VT == MVT::i64))) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N10 = N1.getOperand(0);
+ EVT N00Type = N00.getValueType();
+ EVT N10Type = N10.getValueType();
+ if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -23668,6 +25577,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -23728,6 +25640,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
@@ -23799,7 +25714,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (!Subtarget->hasSSE41())
return SDValue();
- EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+ MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
X = DAG.getBitcast(BlendVT, X);
Y = DAG.getBitcast(BlendVT, Y);
@@ -23813,9 +25728,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
- MachineFunction &MF = DAG.getMachineFunction();
- bool OptForSize =
- MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
// SHLD/SHRD instructions have lower register pressure, but on some
// platforms they have higher latency than the equivalent
@@ -23913,17 +25826,188 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+// Try to turn tests against the signbit in the form of:
+// XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
+// into:
+// SETGT(X, -1)
+static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
+ // This is only worth doing if the output type is i8.
+ if (N->getValueType(0) != MVT::i8)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // We should be performing an xor against a truncated shift.
+ if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse())
+ return SDValue();
+
+ // Make sure we are performing an xor against one.
+ if (!isOneConstant(N1))
+ return SDValue();
+
+ // SetCC on x86 zero extends so only act on this if it's a logical shift.
+ SDValue Shift = N0.getOperand(0);
+ if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse())
+ return SDValue();
+
+ // Make sure we are truncating from one of i16, i32 or i64.
+ EVT ShiftTy = Shift.getValueType();
+ if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
+ return SDValue();
+
+ // Make sure the shift amount extracts the sign bit.
+ if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
+ Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+ return SDValue();
+
+ // Create a greater-than comparison against -1.
+ // N.B. Using SETGE against 0 works but we want a canonical looking
+ // comparison, using SETGT matches up with what TranslateX86CC.
+ SDLoc DL(N);
+ SDValue ShiftOp = Shift.getOperand(0);
+ EVT ShiftOpTy = ShiftOp.getValueType();
+ SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp,
+ DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
+ return Cond;
+}
+
static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
+ return RV;
+
if (Subtarget->hasCMov())
if (SDValue RV = performIntegerAbsCombine(N, DAG))
return RV;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+ return FPLogic;
+
+ return SDValue();
+}
+
+/// This function detects the AVG pattern between vectors of unsigned i8/i16,
+/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
+/// X86ISD::AVG instruction.
+static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget, SDLoc DL) {
+ if (!VT.isVector() || !VT.isSimple())
+ return SDValue();
+ EVT InVT = In.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ EVT ScalarVT = VT.getVectorElementType();
+ if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
+ isPowerOf2_32(NumElems)))
+ return SDValue();
+
+ // InScalarVT is the intermediate type in AVG pattern and it should be greater
+ // than the original input type (i8/i16).
+ EVT InScalarVT = InVT.getVectorElementType();
+ if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
+ return SDValue();
+
+ if (Subtarget->hasAVX512()) {
+ if (VT.getSizeInBits() > 512)
+ return SDValue();
+ } else if (Subtarget->hasAVX2()) {
+ if (VT.getSizeInBits() > 256)
+ return SDValue();
+ } else {
+ if (VT.getSizeInBits() > 128)
+ return SDValue();
+ }
+
+ // Detect the following pattern:
+ //
+ // %1 = zext <N x i8> %a to <N x i32>
+ // %2 = zext <N x i8> %b to <N x i32>
+ // %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
+ // %4 = add nuw nsw <N x i32> %3, %2
+ // %5 = lshr <N x i32> %N, <i32 1 x N>
+ // %6 = trunc <N x i32> %5 to <N x i8>
+ //
+ // In AVX512, the last instruction can also be a trunc store.
+
+ if (In.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // A lambda checking the given SDValue is a constant vector and each element
+ // is in the range [Min, Max].
+ auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
+ if (!BV || !BV->isConstant())
+ return false;
+ for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i));
+ if (!C)
+ return false;
+ uint64_t Val = C->getZExtValue();
+ if (Val < Min || Val > Max)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if each element of the vector is left-shifted by one.
+ auto LHS = In.getOperand(0);
+ auto RHS = In.getOperand(1);
+ if (!IsConstVectorInRange(RHS, 1, 1))
+ return SDValue();
+ if (LHS.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ // Detect a pattern of a + b + 1 where the order doesn't matter.
+ SDValue Operands[3];
+ Operands[0] = LHS.getOperand(0);
+ Operands[1] = LHS.getOperand(1);
+
+ // Take care of the case when one of the operands is a constant vector whose
+ // element is in the range [1, 256].
+ if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
+ Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
+ Operands[0].getOperand(0).getValueType() == VT) {
+ // The pattern is detected. Subtract one from the constant vector, then
+ // demote it and emit X86ISD::AVG instruction.
+ SDValue One = DAG.getConstant(1, DL, InScalarVT);
+ SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT,
+ SmallVector<SDValue, 8>(NumElems, One));
+ Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones);
+ Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
+ return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+ Operands[1]);
+ }
+
+ if (Operands[0].getOpcode() == ISD::ADD)
+ std::swap(Operands[0], Operands[1]);
+ else if (Operands[1].getOpcode() != ISD::ADD)
+ return SDValue();
+ Operands[2] = Operands[1].getOperand(0);
+ Operands[1] = Operands[1].getOperand(1);
+
+ // Now we have three operands of two additions. Check that one of them is a
+ // constant vector with ones, and the other two are promoted from i8/i16.
+ for (int i = 0; i < 3; ++i) {
+ if (!IsConstVectorInRange(Operands[i], 1, 1))
+ continue;
+ std::swap(Operands[i], Operands[2]);
+
+ // Check if Operands[0] and Operands[1] are results of type promotion.
+ for (int j = 0; j < 2; ++j)
+ if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+ Operands[j].getOperand(0).getValueType() != VT)
+ return SDValue();
+
+ // The pattern is detected, emit X86ISD::AVG instruction.
+ return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+ Operands[1].getOperand(0));
+ }
+
return SDValue();
}
@@ -23940,10 +26024,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
// For chips with slow 32-byte unaligned loads, break the 32-byte operation
// into two 16-byte operations.
ISD::LoadExtType Ext = Ld->getExtensionType();
+ bool Fast;
+ unsigned AddressSpace = Ld->getAddressSpace();
unsigned Alignment = Ld->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
- if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
- !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+ if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
+ Ext == ISD::NON_EXTLOAD &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
+ AddressSpace, Alignment, &Fast) && !Fast) {
unsigned NumElems = RegVT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
@@ -24012,8 +26099,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
- assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
- && "WideVecVT should be legal");
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
}
@@ -24026,8 +26113,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
for (unsigned i = 0; i != NumElems; ++i)
ShuffleVec[i] = i * SizeRatio;
- for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
- ShuffleVec[i] = NumElems*SizeRatio;
+ for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
+ ShuffleVec[i] = NumElems * SizeRatio;
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
DAG.getConstant(0, dl, WideVecVT),
&ShuffleVec[0]);
@@ -24055,7 +26142,6 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::NON_EXTLOAD);
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
-
}
/// PerformMSTORECombine - Resolve truncating stores
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24073,6 +26159,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // The truncating store is legal in some cases. For example
+ // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+ // are designated for truncate store.
+ // In this case we don't need any further transformations.
+ if (TLI.isTruncStoreLegal(VT, StVT))
+ return SDValue();
+
// From, To sizes and ElemCount must be pow of two
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
"Unexpected size for truncating masked store");
@@ -24096,12 +26191,12 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
ShuffleVec[i] = i * SizeRatio;
// Can't shuffle using an illegal type.
- assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
- && "WideVecVT should be legal");
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+ "WideVecVT should be legal");
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
- DAG.getUNDEF(WideVecVT),
- &ShuffleVec[0]);
+ DAG.getUNDEF(WideVecVT),
+ &ShuffleVec[0]);
SDValue NewMask;
SDValue Mask = Mst->getMask();
@@ -24133,8 +26228,9 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
}
- return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
- NewMask, StVT, Mst->getMemOperand(), false);
+ return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
+ Mst->getBasePtr(), NewMask, StVT,
+ Mst->getMemOperand(), false);
}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24148,10 +26244,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// If we are saving a concatenation of two XMM registers and 32-byte stores
// are slow, such as on Sandy Bridge, perform two 16-byte stores.
+ bool Fast;
+ unsigned AddressSpace = St->getAddressSpace();
unsigned Alignment = St->getAlignment();
- bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
- if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
- StVT == VT && !IsAligned) {
+ if (VT.is256BitVector() && StVT == VT &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ AddressSpace, Alignment, &Fast) && !Fast) {
unsigned NumElems = VT.getVectorNumElements();
if (NumElems < 2)
return SDValue();
@@ -24178,12 +26276,29 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
if (St->isTruncatingStore() && VT.isVector()) {
+ // Check if we can detect an AVG pattern from the truncation. If yes,
+ // replace the trunc store by a normal store with the result of X86ISD::AVG
+ // instruction.
+ SDValue Avg =
+ detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl);
+ if (Avg.getNode())
+ return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), St->getAlignment());
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+ // The truncating store is legal in some cases. For example
+ // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+ // are designated for truncate store.
+ // In this case we don't need any further transformations.
+ if (TLI.isTruncStoreLegal(VT, StVT))
+ return SDValue();
+
// From, To sizes and ElemCount must be pow of two
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
// We are going to use the original vector elt for storing.
@@ -24306,7 +26421,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget->is64Bit() || F64IsLegal) {
- EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+ MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getPointerInfo(), Ld->isVolatile(),
Ld->isNonTemporal(), Ld->isInvariant(),
@@ -24539,8 +26654,234 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
+static SDValue
+combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
+ SmallVector<SDValue, 8> &Regs) {
+ assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 ||
+ Regs[0].getValueType() == MVT::v2i64));
+ EVT OutVT = N->getValueType(0);
+ EVT OutSVT = OutVT.getVectorElementType();
+ EVT InVT = Regs[0].getValueType();
+ EVT InSVT = InVT.getVectorElementType();
+ SDLoc DL(N);
+
+ // First, use mask to unset all bits that won't appear in the result.
+ assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) &&
+ "OutSVT can only be either i8 or i16.");
+ SDValue MaskVal =
+ DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT);
+ SDValue MaskVec = DAG.getNode(
+ ISD::BUILD_VECTOR, DL, InVT,
+ SmallVector<SDValue, 8>(InVT.getVectorNumElements(), MaskVal));
+ for (auto &Reg : Regs)
+ Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg);
+
+ MVT UnpackedVT, PackedVT;
+ if (OutSVT == MVT::i8) {
+ UnpackedVT = MVT::v8i16;
+ PackedVT = MVT::v16i8;
+ } else {
+ UnpackedVT = MVT::v4i32;
+ PackedVT = MVT::v8i16;
+ }
+
+ // In each iteration, truncate the type by a half size.
+ auto RegNum = Regs.size();
+ for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits();
+ j < e; j *= 2, RegNum /= 2) {
+ for (unsigned i = 0; i < RegNum; i++)
+ Regs[i] = DAG.getNode(ISD::BITCAST, DL, UnpackedVT, Regs[i]);
+ for (unsigned i = 0; i < RegNum / 2; i++)
+ Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2],
+ Regs[i * 2 + 1]);
+ }
+
+ // If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and
+ // then extract a subvector as the result since v8i8 is not a legal type.
+ if (OutVT == MVT::v8i8) {
+ Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]);
+ Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0],
+ DAG.getIntPtrConstant(0, DL));
+ return Regs[0];
+ } else if (RegNum > 1) {
+ Regs.resize(RegNum);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+ } else
+ return Regs[0];
+}
+
+/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
+static SDValue
+combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+ SmallVector<SDValue, 8> &Regs) {
+ assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
+ EVT OutVT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // Shift left by 16 bits, then arithmetic-shift right by 16 bits.
+ SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
+ for (auto &Reg : Regs) {
+ Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+ }
+
+ for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
+ Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2],
+ Regs[i * 2 + 1]);
+
+ if (Regs.size() > 2) {
+ Regs.resize(Regs.size() / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+ } else
+ return Regs[0];
+}
+
+/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
+/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
+/// legalization the truncation will be translated into a BUILD_VECTOR with each
+/// element that is extracted from a vector and then truncated, and it is
+/// diffcult to do this optimization based on them.
+static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT OutVT = N->getValueType(0);
+ if (!OutVT.isVector())
+ return SDValue();
+
+ SDValue In = N->getOperand(0);
+ if (!In.getValueType().isSimple())
+ return SDValue();
+
+ EVT InVT = In.getValueType();
+ unsigned NumElems = OutVT.getVectorNumElements();
+
+ // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
+ // SSE2, and we need to take care of it specially.
+ // AVX512 provides vpmovdb.
+ if (!Subtarget->hasSSE2() || Subtarget->hasAVX2())
+ return SDValue();
+
+ EVT OutSVT = OutVT.getVectorElementType();
+ EVT InSVT = InVT.getVectorElementType();
+ if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
+ (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
+ NumElems >= 8))
+ return SDValue();
+
+ // SSSE3's pshufb results in less instructions in the cases below.
+ if (Subtarget->hasSSSE3() && NumElems == 8 &&
+ ((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
+ (InSVT == MVT::i32 && OutSVT == MVT::i16)))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // Split a long vector into vectors of legal type.
+ unsigned RegNum = InVT.getSizeInBits() / 128;
+ SmallVector<SDValue, 8> SubVec(RegNum);
+ if (InSVT == MVT::i32) {
+ for (unsigned i = 0; i < RegNum; i++)
+ SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+ DAG.getIntPtrConstant(i * 4, DL));
+ } else {
+ for (unsigned i = 0; i < RegNum; i++)
+ SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+ DAG.getIntPtrConstant(i * 2, DL));
+ }
+
+ // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS
+ // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
+ // truncate 2 x v4i32 to v8i16.
+ if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
+ return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
+ else if (InSVT == MVT::i32)
+ return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+ else
+ return SDValue();
+}
+
+static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // Try to detect AVG pattern first.
+ SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
+ Subtarget, SDLoc(N));
+ if (Avg.getNode())
+ return Avg;
+
+ return combineVectorTruncation(N, DAG, Subtarget);
+}
+
+/// Do target-specific dag combines on floating point negations.
+static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ SDValue Arg = N->getOperand(0);
+ SDLoc DL(N);
+
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // If we're negating a FMUL node on a target with FMA, then we can avoid the
+ // use of a constant by performing (-0 - A*B) instead.
+ // FIXME: Check rounding control flags as well once it becomes available.
+ if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
+ Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
+ SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+ return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Zero);
+ }
+
+ // If we're negating a FMA node, then we can adjust the
+ // instruction to include the extra negation.
+ if (Arg.hasOneUse()) {
+ switch (Arg.getOpcode()) {
+ case X86ISD::FMADD:
+ return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FMSUB:
+ return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMADD:
+ return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ case X86ISD::FNMSUB:
+ return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
+ Arg.getOperand(1), Arg.getOperand(2));
+ }
+ }
+ return SDValue();
+}
+
+static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+ // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
+ // These logic operations may be executed in the integer domain.
+ SDLoc dl(N);
+ MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+ unsigned IntOpcode = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+ }
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+ }
+ return SDValue();
+}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
-static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
// F[X]OR(0.0, x) -> x
@@ -24552,7 +26893,8 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
- return SDValue();
+
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
@@ -24576,8 +26918,65 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
N->getOperand(0), N->getOperand(1));
}
+static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ if (Subtarget->useSoftFloat())
+ return SDValue();
+
+ // TODO: Check for global or instruction-level "nnan". In that case, we
+ // should be able to lower to FMAX/FMIN alone.
+ // TODO: If an operand is already known to be a NaN or not a NaN, this
+ // should be an optional swap and FMAX/FMIN.
+
+ EVT VT = N->getValueType(0);
+ if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+ return SDValue();
+
+ // This takes at least 3 instructions, so favor a library call when operating
+ // on a scalar and minimizing code size.
+ if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize())
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc DL(N);
+ EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // There are 4 possibilities involving NaN inputs, and these are the required
+ // outputs:
+ // Op1
+ // Num NaN
+ // ----------------
+ // Num | Max | Op0 |
+ // Op0 ----------------
+ // NaN | Op1 | NaN |
+ // ----------------
+ //
+ // The SSE FP max/min instructions were not designed for this case, but rather
+ // to implement:
+ // Min = Op1 < Op0 ? Op1 : Op0
+ // Max = Op1 > Op0 ? Op1 : Op0
+ //
+ // So they always return Op0 if either input is a NaN. However, we can still
+ // use those instructions for fmaxnum by selecting away a NaN input.
+
+ // If either operand is NaN, the 2nd source operand (Op0) is passed through.
+ auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;
+ SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
+ SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO);
+
+ // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
+ // are NaN, the NaN value of Op1 is the result.
+ auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+ return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
+}
+
/// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FAND(0.0, x) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -24588,11 +26987,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -24603,7 +27003,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
static SDValue PerformBTCombine(SDNode *N,
@@ -24673,6 +27073,57 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
+/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities
+/// to combine math ops, use an LEA, or use a complex addressing mode. This can
+/// eliminate extend, add, and shift instructions.
+static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ // TODO: This should be valid for other integer types.
+ EVT VT = Sext->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+
+ // We need an 'add nsw' feeding into the 'sext'.
+ SDValue Add = Sext->getOperand(0);
+ if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap())
+ return SDValue();
+
+ // Having a constant operand to the 'add' ensures that we are not increasing
+ // the instruction count because the constant is extended for free below.
+ // A constant operand can also become the displacement field of an LEA.
+ auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+ if (!AddOp1)
+ return SDValue();
+
+ // Don't make the 'add' bigger if there's no hope of combining it with some
+ // other 'add' or 'shl' instruction.
+ // TODO: It may be profitable to generate simpler LEA instructions in place
+ // of single 'add' instructions, but the cost model for selecting an LEA
+ // currently has a high threshold.
+ bool HasLEAPotential = false;
+ for (auto *User : Sext->uses()) {
+ if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
+ HasLEAPotential = true;
+ break;
+ }
+ }
+ if (!HasLEAPotential)
+ return SDValue();
+
+ // Everything looks good, so pull the 'sext' ahead of the 'add'.
+ int64_t AddConstant = AddOp1->getSExtValue();
+ SDValue AddOp0 = Add.getOperand(0);
+ SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0);
+ SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
+
+ // The wider add is guaranteed to not wrap because both operands are
+ // sign-extended.
+ SDNodeFlags Flags;
+ Flags.setNoSignedWrap(true);
+ return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -24763,13 +27214,13 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
}
}
- if (!Subtarget->hasFp256())
- return SDValue();
-
- if (VT.isVector() && VT.getSizeInBits() == 256)
+ if (Subtarget->hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
+ if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget))
+ return NewAdd;
+
return SDValue();
}
@@ -24783,9 +27234,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
- (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
- !Subtarget->hasAVX512()))
+ if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA())
return SDValue();
SDValue A = N->getOperand(0);
@@ -24830,8 +27279,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
N0.getOperand(0).hasOneUse()) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!C || C->getZExtValue() != 1)
+ if (!isOneConstant(N0.getOperand(1)))
return SDValue();
return DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
@@ -24884,21 +27332,19 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
- if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
- LHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
- }
+ if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
+ LHS.getOperand(1));
+ return DAG.getSetCC(DL, N->getValueType(0), addV,
+ DAG.getConstant(0, DL, addV.getValueType()), CC);
+ }
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
- if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
- RHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
- }
+ if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) {
+ SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
+ RHS.getOperand(1));
+ return DAG.getSetCC(DL, N->getValueType(0), addV,
+ DAG.getConstant(0, DL, addV.getValueType()), CC);
+ }
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
@@ -24936,52 +27382,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
- SelectionDAG &DAG) {
- SDLoc dl(Load);
- MVT VT = Load->getSimpleValueType(0);
- MVT EVT = VT.getVectorElementType();
- SDValue Addr = Load->getOperand(1);
- SDValue NewAddr = DAG.getNode(
- ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
- DAG.getConstant(Index * EVT.getStoreSize(), dl,
- Addr.getSimpleValueType()));
-
- SDValue NewLoad =
- DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
- DAG.getMachineFunction().getMachineMemOperand(
- Load->getMemOperand(), 0, EVT.getStoreSize()));
- return NewLoad;
-}
-
-static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget *Subtarget) {
- SDLoc dl(N);
- MVT VT = N->getOperand(1)->getSimpleValueType(0);
- assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
- "X86insertps is only defined for v4x32");
-
- SDValue Ld = N->getOperand(1);
- if (MayFoldLoad(Ld)) {
- // Extract the countS bits from the immediate so we can get the proper
- // address when narrowing the vector load to a specific element.
- // When the second source op is a memory address, insertps doesn't use
- // countS and just gets an f32 from that address.
- unsigned DestIndex =
- cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
-
- Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
-
- // Create this as a scalar to vector to match the instruction pattern.
- SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
- // countS bits are ignored when loading from memory on insertps, which
- // means we don't need to explicitly set them to 0.
- return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
- LoadScalarToVector, N->getOperand(2));
- }
- return SDValue();
-}
-
static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
SDValue V0 = N->getOperand(0);
SDValue V1 = N->getOperand(1);
@@ -25008,6 +27408,20 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ // Gather and Scatter instructions use k-registers for masks. The type of
+ // the masks is v*i1. So the mask will be truncated anyway.
+ // The SIGN_EXTEND_INREG my be dropped.
+ SDValue Mask = N->getOperand(2);
+ if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+ NewOps[2] = Mask.getOperand(0);
+ DAG.UpdateNodeOperands(N, NewOps);
+ }
+ return SDValue();
+}
+
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
@@ -25182,7 +27596,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
// Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
// a 32-bit target where SSE doesn't support i64->FP operations.
- if (Op0.getOpcode() == ISD::LOAD) {
+ if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
EVT LdVT = Ld->getValueType(0);
@@ -25357,15 +27771,14 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
}
// Check if we can bypass extracting and re-inserting an element of an input
- // vector. Essentialy:
+ // vector. Essentially:
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
SDValue OrigV = ExtractedV.getOperand(0);
- if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
- if (ExtractIdx->getZExtValue() == 0) {
+ if (isNullConstant(ExtractedV.getOperand(1))) {
MVT OrigVT = OrigV.getSimpleValueType();
// Extract a subvector if necessary...
if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
@@ -25394,7 +27807,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT:
case X86ISD::SHRUNKBLEND:
return PerformSELECTCombine(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI, Subtarget);
case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget);
case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget);
@@ -25414,12 +27827,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
+ case ISD::FNEG: return PerformFNEGCombine(N, DAG, Subtarget);
+ case ISD::TRUNCATE: return PerformTRUNCATECombine(N, DAG, Subtarget);
case X86ISD::FXOR:
- case X86ISD::FOR: return PerformFORCombine(N, DAG);
+ case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
- case X86ISD::FAND: return PerformFANDCombine(N, DAG);
- case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM: return performFMinNumFMaxNumCombine(N, DAG,
+ Subtarget);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget);
+ case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
@@ -25447,14 +27865,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
- case X86ISD::INSERTPS: {
- if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
- return PerformINSERTPSCombine(N, DAG, Subtarget);
- break;
- }
case X86ISD::BLENDI: return PerformBLENDICombine(N, DAG);
+ case ISD::MGATHER:
+ case ISD::MSCATTER: return PerformGatherScatterCombine(N, DAG);
}
return SDValue();
@@ -26084,6 +28497,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, &X86::FR64RegClass);
+ // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
@@ -26168,17 +28582,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
unsigned Size = VT.getSizeInBits();
- MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
- : Size == 16 ? MVT::i16
- : Size == 32 ? MVT::i32
- : Size == 64 ? MVT::i64
- : MVT::Other;
- unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+ if (Size == 1) Size = 8;
+ unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
if (DestReg > 0) {
Res.first = DestReg;
- Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
- : SimpleTy == MVT::i16 ? &X86::GR16RegClass
- : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+ Res.second = Size == 8 ? &X86::GR8RegClass
+ : Size == 16 ? &X86::GR16RegClass
+ : Size == 32 ? &X86::GR32RegClass
: &X86::GR64RegClass;
assert(Res.second->contains(Res.first) && "Register in register class");
} else {
@@ -26196,6 +28606,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.
+ // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
@@ -26244,6 +28655,15 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-bool X86TargetLowering::isTargetFTOL() const {
- return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
+bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+ // Integer division on x86 is expensive. However, when aggressively optimizing
+ // for code size, we prefer to use a div instruction, as it is usually smaller
+ // than the alternative sequence.
+ // The exception to this is vector division. Since x86 doesn't have vector
+ // integer division, leaving the division as-is is a loss even in terms of
+ // size, because it will have to be scalarized, while the alternative code
+ // sequence can be performed in vector form.
+ bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::MinSize);
+ return OptSize && !VT.isVector();
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 723d530..a29dc9a 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -126,6 +126,9 @@ namespace llvm {
/// 1 is the number of bytes of stack to pop.
RET_FLAG,
+ /// Return from interrupt. Operand 0 is the number of bytes to pop.
+ IRET,
+
/// Repeat fill, corresponds to X86::REP_STOSx.
REP_STOS,
@@ -182,6 +185,8 @@ namespace llvm {
/// Compute Sum of Absolute Differences.
PSADBW,
+ /// Compute Double Block Packed Sum-Absolute-Differences
+ DBPSADBW,
/// Bitwise Logical AND NOT of Packed FP values.
ANDNP,
@@ -211,6 +216,8 @@ namespace llvm {
// FP vector get exponent
FGETEXP_RND,
+ // Extract Normalized Mantissas
+ VGETMANT,
// FP Scale
SCALEF,
// Integer add/sub with unsigned saturation.
@@ -236,6 +243,9 @@ namespace llvm {
// Integer absolute value
ABS,
+ // Detect Conflicts Within a Vector
+ CONFLICT,
+
/// Floating point max and min.
FMAX, FMIN,
@@ -282,9 +292,8 @@ namespace llvm {
// Vector integer truncate.
VTRUNC,
-
- // Vector integer truncate with mask.
- VTRUNCM,
+ // Vector integer truncate with unsigned/signed saturation.
+ VTRUNCUS, VTRUNCS,
// Vector FP extend.
VFPEXT,
@@ -295,6 +304,9 @@ namespace llvm {
// Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD,
+ // Convert a vector to mask, set bits base on MSB.
+ CVT2MASK,
+
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -349,6 +361,7 @@ namespace llvm {
// OR/AND test for masks
KORTEST,
+ KTEST,
// Several flavors of instructions with vector shuffle behaviors.
PACKSS,
@@ -382,12 +395,24 @@ namespace llvm {
VPERMIV3,
VPERMI,
VPERM2X128,
- //Fix Up Special Packed Float32/64 values
+ // Bitwise ternary logic
+ VPTERNLOG,
+ // Fix Up Special Packed Float32/64 values
VFIXUPIMM,
- //Range Restriction Calculation For Packed Pairs of Float32/64 values
+ // Range Restriction Calculation For Packed Pairs of Float32/64 values
VRANGE,
+ // Reduce - Perform Reduction Transformation on scalar\packed FP
+ VREDUCE,
+ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits
+ VRNDSCALE,
+ // VFPCLASS - Tests Types Of a FP Values for packed types.
+ VFPCLASS,
+ // VFPCLASSS - Tests Types Of a FP Values for scalar types.
+ VFPCLASSS,
// Broadcast scalar to vector
VBROADCAST,
+ // Broadcast mask to vector
+ VBROADCASTM,
// Broadcast subvector to vector
SUBV_BROADCAST,
// Insert/Extract vector element
@@ -397,13 +422,21 @@ namespace llvm {
/// SSE4A Extraction and Insertion.
EXTRQI, INSERTQI,
+ // XOP variable/immediate rotations
+ VPROT, VPROTI,
+ // XOP arithmetic/logical shifts
+ VPSHA, VPSHL,
+ // XOP signed/unsigned integer comparisons
+ VPCOM, VPCOMU,
+
// Vector multiply packed unsigned doubleword integers
PMULUDQ,
// Vector multiply packed signed doubleword integers
PMULDQ,
// Vector Multiply Packed UnsignedIntegers with Round and Scale
MULHRS,
-
+ // Multiply and Add Packed Integers
+ VPMADDUBSW, VPMADDWD,
// FMA nodes
FMADD,
FNMADD,
@@ -418,7 +451,6 @@ namespace llvm {
FNMSUB_RND,
FMADDSUB_RND,
FMSUBADD_RND,
- RNDSCALE,
// Compress and expand
COMPRESS,
@@ -443,9 +475,6 @@ namespace llvm {
// falls back to heap allocation if not.
SEG_ALLOCA,
- // Windows's _ftol2 runtime routine to do fptoui.
- WIN_FTOL,
-
// Memory barrier
MEMBARRIER,
MFENCE,
@@ -580,15 +609,6 @@ namespace llvm {
bool isCalleePop(CallingConv::ID CallingConv,
bool is64Bit, bool IsVarArg, bool TailCallOpt);
- /// AVX512 static rounding constants. These need to match the values in
- /// avx512fintrin.h.
- enum STATIC_ROUNDING {
- TO_NEAREST_INT = 0,
- TO_NEG_INF = 1,
- TO_POS_INF = 2,
- TO_ZERO = 3,
- CUR_DIRECTION = 4
- };
}
//===--------------------------------------------------------------------===//
@@ -850,16 +870,7 @@ namespace llvm {
/// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
- }
-
- /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
- bool isTargetFTOL() const;
-
- /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
- /// given type.
- bool isIntegerTypeFTOL(EVT VT) const {
- return isTargetFTOL() && VT == MVT::i64;
+ (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}
/// \brief Returns true if it is beneficial to convert a load of a constant
@@ -879,6 +890,16 @@ namespace llvm {
unsigned getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
/// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -890,6 +911,11 @@ namespace llvm {
bool getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const override;
+ /// Return true if the target stores SafeStack pointer at a fixed offset in
+ /// some non-standard address space, and populates the address space and
+ /// offset as appropriate.
+ Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
@@ -899,6 +925,8 @@ namespace llvm {
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+ bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -908,7 +936,6 @@ namespace llvm {
/// Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- const DataLayout *TD;
/// Select between SSE or x87 floating point ops.
/// When SSE is available, use it for f32 operations.
@@ -955,7 +982,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
- bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const;
@@ -969,7 +995,6 @@ namespace llvm {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
@@ -994,9 +1019,9 @@ namespace llvm {
SDValue LowerToBT(SDValue And, ISD::CondCode CC,
SDLoc dl, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -1042,27 +1067,16 @@ namespace llvm {
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
- bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
- TargetLoweringBase::AtomicRMWExpansionKind
+ TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
- bool needsCmpXchgNb(const Type *MemType) const;
-
- /// Utility function to emit atomic-load-arith operations (and, or, xor,
- /// nand, max, min, umax, umin). It takes the corresponding instruction to
- /// expand, the associated machine basic block, and the associated X86
- /// opcodes for reg/reg.
- MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
-
- /// Utility function to emit atomic-load-arith operations (and, or, xor,
- /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
- MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ bool needsCmpXchgNb(Type *MemType) const;
// Utility function to emit the low-level va_arg code for X86-64.
MachineBasicBlock *EmitVAARG64WithCustomInserter(
@@ -1077,18 +1091,24 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
+
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;
- MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
@@ -1121,7 +1141,7 @@ namespace llvm {
unsigned &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
- bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+ unsigned combineRepeatedFPDivisors() const override;
};
namespace X86 {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index faa9150..8bf2925 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -79,7 +79,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
!if (!eq (TypeVariantName, "i"),
!if (!eq (Size, 128), "v2i64",
!if (!eq (Size, 256), "v4i64",
- !if (!eq (Size, 512),
+ !if (!eq (Size, 512),
!if (!eq (EltSize, 64), "v8i64", "v16i32"),
VTName))), VTName));
@@ -145,6 +145,8 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// We map scalar types to the smallest (128-bit) vector type
// with the appropriate element type. This allows to use the same masking logic.
+def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
+def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
@@ -274,6 +276,22 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
+// operand differs from the output VT. This requires a bitconvert on
+// the preserved vector going into the vselect.
+multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
+ X86VectorVTInfo InVT,
+ dag Outs, dag NonTiedIns, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS> :
+ AVX512_maskable_common<O, F, OutVT, Outs,
+ !con((ins InVT.RC:$src1), NonTiedIns),
+ !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+ !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+ (vselect InVT.KRCWM:$mask, RHS,
+ (bitconvert InVT.RC:$src1))>;
+
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@@ -471,84 +489,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-
-multiclass vinsert_for_size_no_alt<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> {
+multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
- def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, From.RC:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm)))]>,
- EVEX_4V, EVEX_V512;
+ defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
- let mayLoad = 1 in
- def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, From.MemOp:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- []>,
- EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
- }
-}
-
-multiclass vinsert_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> :
- vinsert_for_size_no_alt<Opcode, From, To,
- vinsert_insert, INSERT_get_vinsert_imm> {
- // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
- // vinserti32x4. Only add this if 64x2 and friends are not supported
- // natively via AVX512DQ.
- let Predicates = [NoDQI] in
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
+ EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ }
+}
+
+multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+ X86VectorVTInfo To, PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
+ let Predicates = p in {
def : Pat<(vinsert_insert:$ins
- (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
- (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
- VR512:$src1, From.RC:$src2,
- (INSERT_get_vinsert_imm VR512:$ins)))>;
+ (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rr")
+ To.RC:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+
+ def : Pat<(vinsert_insert:$ins
+ (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rm")
+ To.RC:$src1, addr:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+ }
}
multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
ValueType EltVT64, int Opcode256> {
- defm NAME # "32x4" : vinsert_for_size<Opcode128,
+
+ let Predicates = [HasVLX] in
+ defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ vinsert128_insert>, EVEX_V256;
+
+ defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT32, VR128X>,
X86VectorVTInfo<16, EltVT32, VR512>,
- X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vinsert128_insert>, EVEX_V512;
+
+ defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>;
- let Predicates = [HasDQI] in
- defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ vinsert256_insert>, VEX_W, EVEX_V512;
+
+ let Predicates = [HasVLX, HasDQI] in
+ defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ vinsert128_insert>, VEX_W, EVEX_V256;
+
+ let Predicates = [HasDQI] in {
+ defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>, VEX_W;
- defm NAME # "64x4" : vinsert_for_size<Opcode256,
- X86VectorVTInfo< 4, EltVT64, VR256X>,
- X86VectorVTInfo< 8, EltVT64, VR512>,
- X86VectorVTInfo< 8, EltVT32, VR256>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>, VEX_W;
- let Predicates = [HasDQI] in
- defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
- X86VectorVTInfo< 8, EltVT32, VR256X>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>;
+ vinsert128_insert>, VEX_W, EVEX_V512;
+
+ defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert>, EVEX_V512;
+ }
}
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
+// Codegen pattern with the alternative types,
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+
+// Codegen pattern with the alternative types insert VEC128 into VEC256
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+// Codegen pattern with the alternative types insert VEC128 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types insert VEC256 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
@@ -566,90 +623,158 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
// AVX-512 VECTOR EXTRACT
//---
+multiclass vextract_for_size_first_position_lowering<X86VectorVTInfo From,
+ X86VectorVTInfo To> {
+ // A subvector extract from the first vector position is
+ // a subregister copy that needs no instruction.
+ def NAME # To.NumElts:
+ Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))),
+ (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>;
+}
+
multiclass vextract_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
- PatFrag vextract_extract,
- SDNodeXForm EXTRACT_get_vextract_imm> {
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vextract_extract> :
+ vextract_for_size_first_position_lowering<From, To> {
+
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+ // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
+ // vextract_extract), we interesting only in patterns without mask,
+ // intrinsics pattern match generated bellow.
defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
- (ins VR512:$src1, u8imm:$idx),
- "vextract" # To.EltTypeName # "x4",
+ (ins From.RC:$src1, i32u8imm:$idx),
+ "vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
- [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
+ [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
(iPTR imm)))]>,
- AVX512AIi8Base, EVEX, EVEX_V512;
- let mayStore = 1 in
- def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
- (ins To.MemOp:$dst, VR512:$src1, u8imm:$src2),
- "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
- "$dst, $src1, $src2}",
- []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
- }
-
- // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
- // vextracti32x4
- def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
- (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
- VR512:$src1,
- (EXTRACT_get_vextract_imm To.RC:$ext)))>;
-
- // A 128/256-bit subvector extract from the first 512-bit vector position is
- // a subregister copy that needs no instruction.
- def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
- (To.VT
- (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
-
- // And for the alternative types.
- def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
- (AltTo.VT
- (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+ AVX512AIi8Base, EVEX;
+ let mayStore = 1 in {
+ def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$src2),
+ "vextract" # To.EltTypeName # "x" # To.NumElts #
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, EVEX;
+
+ def rmk : AVX512AIi8<Opcode, MRMDestMem, (outs),
+ (ins To.MemOp:$dst, To.KRCWM:$mask,
+ From.RC:$src1, i32u8imm:$src2),
+ "vextract" # To.EltTypeName # "x" # To.NumElts #
+ "\t{$src2, $src1, $dst {${mask}}|"
+ "$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K, EVEX;
+ }//mayStore = 1
+ }
// Intrinsic call with masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
- (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rrk")
+ To.RC:$src0,
+ (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+ From.RC:$src1, imm:$idx)>;
// Intrinsic call with zero-masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
- (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rrkz")
+ (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+ From.RC:$src1, imm:$idx)>;
// Intrinsic call without masking.
def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x4_512")
- VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
- (!cast<Instruction>(NAME # To.EltSize # "x4rr")
- VR512:$src1, imm:$idx)>;
+ "x" # To.NumElts # "_" # From.Size)
+ From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+ (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+ From.ZSuffix # "rr")
+ From.RC:$src1, imm:$idx)>;
+}
+
+// Codegen pattern for the alternative types
+multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+ X86VectorVTInfo To, PatFrag vextract_extract,
+ SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> :
+ vextract_for_size_first_position_lowering<From, To> {
+
+ let Predicates = p in
+ def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rr")
+ From.RC:$src1,
+ (EXTRACT_get_vextract_imm To.RC:$ext)))>;
}
-multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
- ValueType EltVT64, int Opcode64> {
- defm NAME # "32x4" : vextract_for_size<Opcode32,
+multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
+ ValueType EltVT64, int Opcode256> {
+ defm NAME # "32x4Z" : vextract_for_size<Opcode128,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 4, EltVT32, VR128X>,
+ vextract128_extract>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+ defm NAME # "64x4Z" : vextract_for_size<Opcode256,
X86VectorVTInfo< 8, EltVT64, VR512>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ vextract256_extract>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
+ let Predicates = [HasVLX] in
+ defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ vextract128_extract>,
+ EVEX_V256, EVEX_CD8<32, CD8VT4>;
+ let Predicates = [HasVLX, HasDQI] in
+ defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
- vextract128_extract,
- EXTRACT_get_vextract128_imm>;
- defm NAME # "64x4" : vextract_for_size<Opcode64,
+ vextract128_extract>,
+ VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
+ let Predicates = [HasDQI] in {
+ defm NAME # "64x2Z" : vextract_for_size<Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
- X86VectorVTInfo< 4, EltVT64, VR256X>,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vextract128_extract>,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
+ defm NAME # "32x8Z" : vextract_for_size<Opcode256,
X86VectorVTInfo<16, EltVT32, VR512>,
- X86VectorVTInfo< 8, EltVT32, VR256>,
- vextract256_extract,
- EXTRACT_get_vextract256_imm>, VEX_W;
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ vextract256_extract>,
+ EVEX_V512, EVEX_CD8<32, CD8VT8>;
+ }
}
defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
+// extract_subvector codegen patterns with the alternative types.
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+
+// Codegen pattern with the alternative types extract VEC128 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types extract VEC256 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+
// A 128-bit subvector insert to the first 512-bit vector position
// is a subregister copy that needs no instruction.
def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
@@ -677,6 +802,10 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
@@ -694,50 +823,49 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
- ValueType svt, X86VectorVTInfo _> {
- defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
- "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
- T8PD, EVEX;
- let mayLoad = 1 in {
- defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src),
- "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
- (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
- T8PD, EVEX;
- }
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+ T8PD, EVEX;
+ let mayLoad = 1 in
+ defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast
+ (SrcInfo.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
}
-multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
- AVX512VLVectorVTInfo _> {
- defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
- EVEX_V256;
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ EVEX_V256;
}
}
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
+ avx512vl_f32_info>;
let Predicates = [HasVLX] in {
- defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
- v4f32, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss",
+ v4f32x_info, v4f32x_info>, EVEX_V128;
}
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
+ avx512vl_f64_info>, VEX_W;
}
// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
-// Later, we can canonize broadcast instructions before ISel phase and
+// Later, we can canonize broadcast instructions before ISel phase and
// eliminate additional patterns on ISel.
// SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar
// representations of source
@@ -834,70 +962,50 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
(VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
-multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
- RegisterClass KRC> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
- def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
- (X86VBroadcast (ld_frag addr:$src)),
- (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
- }
-}
-
-defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
- loadi32, VR512, v16i32, v4i32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
- loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VT1>;
+// Provide aliases for broadcast from the same register class that
+// automatically does the extract.
+multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
+ def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
+ (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
+}
+
+multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
+ EVEX_V512;
+ // Defined separately to avoid redefinition.
+ defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+ EVEX_V128;
+ }
+}
+
+defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
+ avx512vl_i8_info, HasBWI>;
+defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
+ avx512vl_i16_info, HasBWI>;
+defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
+ avx512vl_i32_info, HasAVX512>;
+defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
+ avx512vl_i64_info, HasAVX512>, VEX_W;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set _Dst.RC:$dst,
- (_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))))]>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
- _Src.MemOp:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
- _Src.MemOp:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- }
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (X86SubVBroadcast
+ (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+ AVX5128IBase, EVEX;
}
defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
@@ -944,10 +1052,45 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
EVEX_V512, EVEX_CD8<32, CD8VT8>;
}
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
- (VPBROADCASTDZrr VR128X:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
- (VPBROADCASTQZrr VR128X:$src)>;
+multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _Dst, X86VectorVTInfo _Src,
+ SDNode OpNode = X86SubVBroadcast> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+ T8PD, EVEX;
+ let mayLoad = 1 in
+ defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (_Dst.VT (OpNode
+ (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>,
+ T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>;
+}
+
+multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> {
+ let Predicates = [HasDQI] in
+ defm Z : avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>,
+ EVEX_V512;
+ let Predicates = [HasDQI, HasVLX] in
+ defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>,
+ EVEX_V256;
+}
+
+multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> :
+ avx512_common_broadcast_32x2<opc, OpcodeStr, _> {
+
+ let Predicates = [HasDQI, HasVLX] in
+ defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128,
+ X86SubV32x2Broadcast>, EVEX_V128;
+}
+
+defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
+ avx512vl_i32_info>;
+defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
+ avx512vl_f32_info>;
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
@@ -959,21 +1102,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
(VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
- (VBROADCASTSSZr VR128X:$src)>;
-def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
- (VBROADCASTSDZr VR128X:$src)>;
-
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
@@ -985,170 +1113,178 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST MASK TO VECTOR REGISTER
//---
-
-multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
- RegisterClass KRC> {
-let Predicates = [HasCDI] in
-def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V512;
-
-let Predicates = [HasCDI, HasVLX] in {
-def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V128;
-def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
+multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, RegisterClass KRC> {
+ def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX, EVEX_V256;
+ [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
}
+
+multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
+ let Predicates = [HasCDI] in
+ defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
+ let Predicates = [HasCDI, HasVLX] in {
+ defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
+ defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
+ }
}
-let Predicates = [HasCDI] in {
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
- VK16>;
+ avx512vl_i32_info, VK16>;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
- VK8>, VEX_W;
-}
+ avx512vl_i64_info, VK8>, VEX_W;
//===----------------------------------------------------------------------===//
-// AVX-512 - VPERM
-//
-// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain in {
- def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
- EVEX;
- def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
- (ins _.MemOp:$src1, u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (OpNode (_.LdFrag addr:$src1),
- (i8 imm:$src2))))]>,
- EVEX, EVEX_CD8<_.EltSize, CD8VF>;
+// -- VPERMI2 - 3 source operands form --
+multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+let Constraints = "$src1 = $dst" in {
+ defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ AVX5128IBase;
+
+ let mayLoad = 1 in
+ defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
+ (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ EVEX_4V, AVX5128IBase;
+ }
}
+multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+ let mayLoad = 1, Constraints = "$src1 = $dst" in
+ defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (_.VT (X86VPermi2X IdxVT.RC:$src1,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+ AVX5128IBase, EVEX_4V, EVEX_B;
}
-multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
- X86VectorVTInfo Ctrl> :
- avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
- let ExeDomain = _.ExeDomain in {
- def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2),
- !strconcat("vpermil" # _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (X86VPermilpv _.RC:$src1,
- (Ctrl.VT Ctrl.RC:$src2))))]>,
- EVEX_4V;
- def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
- (ins _.RC:$src1, Ctrl.MemOp:$src2),
- !strconcat("vpermil" # _.Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set _.RC:$dst,
- (_.VT (X86VPermilpv _.RC:$src1,
- (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
- EVEX_4V;
- }
-}
-defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
- EVEX_V512;
-defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
- EVEX_V512, VEX_W;
-
-def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
- (VPERMILPDZri VR512:$src1, imm:$imm)>;
-
-// -- VPERM2I - 3 source operands form --
-multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _> {
+multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo ShuffleMask> {
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
+ }
+}
+
+multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo Idx> {
+ let Predicates = [HasBWI] in
+ defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
+ let Predicates = [HasBWI, HasVLX] in {
+ defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
+ }
+}
+
+defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
+ avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
+ avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2W : avx512_perm_i_sizes_w<0x75, "vpermi2w",
+ avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
+ avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
+ avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+
+// VPERMT2
+multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let Constraints = "$src1 = $dst" in {
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.RC:$src3),
+ (ins IdxVT.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
AVX5128IBase;
let mayLoad = 1 in
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.MemOp:$src3),
+ (ins IdxVT.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2,
- (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
+ (bitconvert (_.LdFrag addr:$src3))))>,
EVEX_4V, AVX5128IBase;
}
}
-multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86VectorVTInfo _> {
+multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
let mayLoad = 1, Constraints = "$src1 = $dst" in
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
- (_.VT (OpNode _.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+ (_.VT (X86VPermt2 _.RC:$src1,
+ IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
AVX5128IBase, EVEX_4V, EVEX_B;
}
-multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr,
- SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
- let Predicates = [HasAVX512] in
- defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo ShuffleMask> {
+ defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
+ ShuffleMask.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
- EVEX_V128;
- defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
- EVEX_V256;
+ defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
+ ShuffleMask.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>,
+ avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
+ ShuffleMask.info256>, EVEX_V256;
}
}
-multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr,
- SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+
+multiclass avx512_perm_t_sizes_w<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo VTInfo,
+ AVX512VLVectorVTInfo Idx> {
let Predicates = [HasBWI] in
- defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>,
- EVEX_V512;
+ defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+ Idx.info512>, EVEX_V512;
let Predicates = [HasBWI, HasVLX] in {
- defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
- EVEX_V128;
- defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,
- avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
- EVEX_V256;
- }
-}
-defm VPERMI2D : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3,
- avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2D : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3,
- avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3,
- avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2W : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3,
- avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPERMI2W : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3,
- avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+ defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+ Idx.info128>, EVEX_V128;
+ defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+ Idx.info256>, EVEX_V256;
+ }
+}
+
+defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
+ avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
+ avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2W : avx512_perm_t_sizes_w<0x7D, "vpermt2w",
+ avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
+ avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
+ avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@@ -1265,41 +1401,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
//===----------------------------------------------------------------------===//
// avx512_cmp_scalar - AVX512 CMPSS and CMPSD
-multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
- SDNode OpNode, ValueType VT,
- PatFrag ld_frag, string Suffix> {
- def rr : AVX512Ii8<0xC2, MRMSrcReg,
- (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", Suffix,
+
+multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
+
+ defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc)>, EVEX_4V;
+ let mayLoad = 1 in
+ defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+ defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+ "vcmp${cc}"#_.Suffix,
+ "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ imm:$cc,
+ (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1, hasSideEffects = 0 in {
+ defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs VK1:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
+ defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc, $src2, $src1", "$src1, $src2, $cc">,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+ defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+ (outs _.KRC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+ "vcmp"#_.Suffix,
+ "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+ EVEX_4V, EVEX_B;
+ }// let isAsmParserOnly = 1, hasSideEffects = 0
+
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512Ii8<0xC2, MRMSrcReg,
+ (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
+ !strconcat("vcmp${cc}", _.Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+ [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+ _.FRC:$src2,
+ imm:$cc))],
IIC_SSE_ALU_F32S_RR>, EVEX_4V;
- def rm : AVX512Ii8<0xC2, MRMSrcMem,
- (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
- !strconcat("vcmp${cc}", Suffix,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VK1:$dst, (OpNode (VT RC:$src1),
- (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
- let isAsmParserOnly = 1, hasSideEffects = 0 in {
- def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
- (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- !strconcat("vcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
let mayLoad = 1 in
- def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
- (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- !strconcat("vcmp", Suffix,
- "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
- [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rm : AVX512Ii8<0xC2, MRMSrcMem,
+ (outs _.KRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ !strconcat("vcmp${cc}", _.Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2),
+ imm:$cc))],
+ IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
}
}
let Predicates = [HasAVX512] in {
-defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, X86cmpms, f32, loadf32, "ss">,
- XS;
-defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, X86cmpms, f64, loadf64, "sd">,
- XD, VEX_W;
+ defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
+ AVX512XSIi8Base;
+ defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
+ AVX512XDIi8Base, VEX_W;
}
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -1700,6 +1880,128 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
imm:$cc), VK8)>;
+// ----------------------------------------------------------------
+// FPClass
+//handle fpclass instruction mask = op(reg_scalar,imm)
+// op(mem_scalar,imm)
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ let mayLoad = 1, AddedComplexity = 20 in {
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ }
+ }
+}
+
+//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
+// fpclass(reg_vec, mem_vec, imm)
+// fpclass(reg_vec, broadcast(eltVt), imm)
+multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, string mem, string broadcast>{
+ def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix#
+ "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ let mayLoad = 1 in {
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##mem#
+ "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ [(set _.KRC:$dst,(OpNode
+ (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##mem#
+ "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
+ (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+ def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ _.BroadcastStr##", $dst | $dst, ${src1}"
+ ##_.BroadcastStr##", $src2}",
+ [(set _.KRC:$dst,(OpNode
+ (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2)))], NoItinerary>,EVEX_B;
+ def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+ _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+ _.BroadcastStr##", $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
+ (_.VT (X86VBroadcast
+ (_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>,
+ EVEX_B, EVEX_K;
+ }
+}
+
+multiclass avx512_vector_fpclass_all<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
+ string broadcast>{
+ let Predicates = [prd] in {
+ defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
+ broadcast>, EVEX_V512;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
+ broadcast>, EVEX_V128;
+ defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
+ broadcast>, EVEX_V256;
+ }
+}
+
+multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
+ bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
+ defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
+ VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
+ VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
+ defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+ f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
+ defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+ f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
+ X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
+
//-----------------------------------------------------------------
// Mask register copy, including
// - copy between mask registers
@@ -1786,6 +2088,11 @@ let Predicates = [HasDQI] in {
(KMOVBmk addr:$dst, VK8:$src)>;
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
(KMOVBkm addr:$src)>;
+
+ def : Pat<(store VK4:$src, addr:$dst),
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
+ def : Pat<(store VK2:$src, addr:$dst),
+ (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
@@ -1837,10 +2144,15 @@ let Predicates = [HasAVX512] in {
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
+
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
(AND32ri (KMOVWrk
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+ def : Pat<(i8 (anyext VK1:$src)),
+ (EXTRACT_SUBREG
+ (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
+
def : Pat<(i64 (zext VK1:$src)),
(AND64ri8 (SUBREG_TO_REG (i64 0),
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
@@ -1848,17 +2160,19 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
- def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK16)>;
- def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK8)>;
-}
-let Predicates = [HasBWI] in {
- def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK32)>;
- def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
- (COPY_TO_REGCLASS VK1:$src, VK64)>;
}
+def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK16)>;
+def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK8)>;
+def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK4)>;
+def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK2)>;
+def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK32)>;
+def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK64)>;
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
@@ -1955,11 +2269,12 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, bit IsCommutable> {
+ SDPatternOperator OpNode, bit IsCommutable,
+ Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- HasAVX512, IsCommutable>, VEX_4V, VEX_L, PS;
+ prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
@@ -1974,6 +2289,7 @@ defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
+defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
multiclass avx512_mask_binop_int<string IntName, string InstName> {
let Predicates = [HasAVX512] in
@@ -2047,59 +2363,48 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
(COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
// Mask unpacking
-multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
- RegisterClass KRC> {
- let Predicates = [HasAVX512] in
- def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
-}
+multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
+ RegisterClass KRCSrc, Predicate prd> {
+ let Predicates = [prd] in {
+ def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
+ (ins KRC:$src1, KRC:$src2),
+ "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V, VEX_L;
-multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
- defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
- VEX_4V, VEX_L, PD;
+ def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
+ (!cast<Instruction>(NAME##rr)
+ (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
+ (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
+ }
}
-defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
-def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
- (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
- (COPY_TO_REGCLASS VK8:$src1, VK16))>;
-
-
-multiclass avx512_mask_unpck_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
- (i16 GR16:$src1), (i16 GR16:$src2)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
- (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
-}
-defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode> {
- let Predicates = [HasAVX512], Defs = [EFLAGS] in
+ SDNode OpNode, Predicate prd> {
+ let Predicates = [prd], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
}
-multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, PS;
- let Predicates = [HasDQI] in
- defm B : avx512_mask_testop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
- VEX, PD;
- let Predicates = [HasBWI] in {
- defm Q : avx512_mask_testop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
- VEX, PS, VEX_W;
- defm D : avx512_mask_testop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
- VEX, PD, VEX_W;
- }
+multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ Predicate prdW = HasAVX512> {
+ defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+ VEX, PD;
+ defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+ VEX, PS;
+ defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+ VEX, PS, VEX_W;
+ defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+ VEX, PD, VEX_W;
}
defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
+defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -2124,7 +2429,7 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
let Predicates = [HasDQI] in
defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
VEX, TAPD;
- }
+ }
}
defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
@@ -2167,24 +2472,52 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
(v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
+ (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;
+
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
+ (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
+
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
(v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
(v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
-let Predicates = [HasVLX] in {
- def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
- (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
- def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
- (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
- def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
- (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
- def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
- (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
- def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
- (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
-}
+def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+
+def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+ (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+
+def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
+
+def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+ (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+
+def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+ (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>;
+
+def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))),
+ (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>;
+
def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
(v8i1 (COPY_TO_REGCLASS
@@ -2304,23 +2637,21 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag st_frag, PatFrag mstore> {
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
- OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
- _.ExeDomain>, EVEX;
- let Constraints = "$src1 = $dst" in
- def rrk_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
- (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
- OpcodeStr #
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+
+ def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+ OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
+ [], _.ExeDomain>, EVEX;
+ def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ (ins _.KRCWM:$mask, _.RC:$src),
+ OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
+ "${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K;
- def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
+ def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
- OpcodeStr #
- "\t{$src, ${dst} {${mask}} {z}|" #
+ OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
[], _.ExeDomain>, EVEX, EVEX_KZ;
- }
+
let mayStore = 1 in {
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -2425,22 +2756,6 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
- (VMOVUPSZmrk addr:$ptr,
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
- (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
- (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
-
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512>,
avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
@@ -2502,17 +2817,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(v16i32 VR512:$src))),
(VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
-// NoVLX patterns
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
- (VMOVDQU32Zmrk addr:$ptr,
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
- (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
- (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
- (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
// Move Int Doubleword to Packed Double Int
//
@@ -2520,32 +2824,37 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
- EVEX, VEX_LIG;
+ EVEX;
def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector GR64:$src)))],
- IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
+ IIC_SSE_MOVDQ>, EVEX, VEX_W;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>,
+ EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in {
-def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (bitconvert GR64:$src))],
+ [(set FR64X:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bitconvert FR64:$src))],
+ [(set GR64:$dst, (bitconvert FR64X:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-}
-def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+ [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
EVEX_CD8<64, CD8VT1>;
+}
// Move Int Doubleword to Single Scalar
//
@@ -2553,27 +2862,27 @@ let isCodeGenOnly = 1 in {
def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert GR32:$src))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
+ IIC_SSE_MOVDQ>, EVEX;
def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
}
// Move doubleword from xmm register to r/m32
//
def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
- EVEX, VEX_LIG;
+ EVEX;
def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128X:$src),
+ [(store (i32 (extractelt (v4i32 VR128X:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
- EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX, EVEX_CD8<32, CD8VT1>;
// Move quadword from xmm1 register to r/m64
//
@@ -2581,16 +2890,28 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))],
- IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
+ IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
Requires<[HasAVX512, In64BitMode]>;
-def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128X:$src),
- "vmovq\t{$src, $dst|$dst, $src}",
- [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
- addr:$dst)], IIC_SSE_MOVDQ>,
- EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
- Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
+ Requires<[HasAVX512, In64BitMode]>;
+
+def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128X:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
+ addr:$dst)], IIC_SSE_MOVDQ>,
+ EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
+ Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+
+let hasSideEffects = 0 in
+def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
+ EVEX, VEX_W;
// Move Scalar Single to Double Int
//
@@ -2599,92 +2920,95 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
(ins FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32X:$src))],
- IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
+ IIC_SSE_MOVD_ToGP>, EVEX;
def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, FR32X:$src),
"vmovd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
- IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
}
// Move Quadword Int to Packed Quadword Int
//
-def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
- EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
//===----------------------------------------------------------------------===//
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar <string asm, RegisterClass RC,
- SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, PatFrag mem_pat> {
- let hasSideEffects = 0 in {
- def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
- !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
- (scalar_to_vector RC:$src2))))],
- IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
- let Constraints = "$src1 = $dst" in
- def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
- !strconcat(asm,
- "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
- [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
- def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
- EVEX, VEX_LIG;
+multiclass avx512_move_scalar <string asm, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2),
+ asm, "$src2, $src1","$src1, $src2",
+ (_.VT (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2))),
+ IIC_SSE_MOV_S_RR>, EVEX_4V;
+ let Constraints = "$src1 = $dst" , mayLoad = 1 in
+ defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
+ (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src),
+ asm,"$src","$src",
+ (_.VT (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src)))))>, EVEX;
+ let isCodeGenOnly = 1 in {
+ def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.FRC:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))))],
+ _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
+ let mayLoad = 1 in
+ def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+ _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+ }
let mayStore = 1 in {
- def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
- !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
- EVEX, VEX_LIG;
- def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
- !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- [], IIC_SSE_MOV_S_MR>,
- EVEX, VEX_LIG, EVEX_K;
+ def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
+ EVEX;
+ def mrk: AVX512PI<0x11, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
+ !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+ [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
} // mayStore
- } //hasSideEffects = 0
}
-let ExeDomain = SSEPackedSingle in
-defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
- loadf32>, XS, EVEX_CD8<32, CD8VT1>;
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+ VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
-let ExeDomain = SSEPackedDouble in
-defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
- loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+ VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
- (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
- VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+ (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+ VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
- (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
- VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+ (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+ VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
-// For the disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
- def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins VR128X:$src1, FR32X:$src2),
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XS, EVEX_4V, VEX_LIG;
- def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
- (ins VR128X:$src1, FR64X:$src2),
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
- IIC_SSE_MOV_S_RR>,
- XD, EVEX_4V, VEX_LIG, VEX_W;
-}
+defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
+ XS, EVEX_4V, VEX_LIG;
+
+defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
+ XD, EVEX_4V, VEX_LIG, VEX_W;
let Predicates = [HasAVX512] in {
let AddedComplexity = 15 in {
@@ -2768,10 +3092,10 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
addr:$dst),
(VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
- def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
addr:$dst),
(VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
@@ -2835,7 +3159,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(v2i64 VR128X:$src))))],
IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
-let AddedComplexity = 20 in
+let AddedComplexity = 20 , isCodeGenOnly = 1 in
def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -2964,7 +3288,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, OpndItins itins,
bit IsCommutable = 0> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
itins.rr, IsCommutable>,
@@ -2972,7 +3296,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))),
@@ -2986,7 +3310,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
let mayLoad = 1 in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1,
@@ -3058,20 +3382,20 @@ multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
- defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+ defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
IsCommutable>;
- defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+ defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
IsCommutable>;
}
multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
- defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+ defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
IsCommutable>;
- defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+ defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
IsCommutable>;
}
@@ -3086,15 +3410,15 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
}
multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
- SDNode OpNode,X86VectorVTInfo _Src,
+ SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst, bit IsCommutable = 0> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
- "$src2, $src1","$src1, $src2",
- (_Dst.VT (OpNode
- (_Src.VT _Src.RC:$src1),
+ "$src2, $src1","$src1, $src2",
+ (_Dst.VT (OpNode
+ (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2))),
- itins.rr, IsCommutable>,
+ itins.rr, IsCommutable>,
AVX512BIBase, EVEX_4V;
let mayLoad = 1 in {
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
@@ -3106,12 +3430,12 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
AVX512BIBase, EVEX_4V;
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
- (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
+ (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Dst.BroadcastStr##", $src1",
"$src1, ${src2}"##_Dst.BroadcastStr,
- (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Dst.VT (X86VBroadcast
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+ (_Dst.VT (X86VBroadcast
(_Dst.ScalarLdFrag addr:$src2)))))),
itins.rm>,
AVX512BIBase, EVEX_4V, EVEX_B;
@@ -3127,24 +3451,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
SSE_INTALU_ITINS_P, HasBWI, 0>;
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
- SSE_INTALU_ITINS_P, HasBWI, 0>;
-defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
- SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
- HasBWI, 1>;
-defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P,
+ SSE_INTALU_ITINS_P, HasBWI, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
+ SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
+ SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
HasBWI, 1>;
-defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P,
- HasBWI, 1>, T8PD;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>, T8PD;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
- SSE_INTALU_ITINS_P, HasBWI, 1>;
-
+ SSE_INTALU_ITINS_P, HasBWI, 1>;
+
multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
SDNode OpNode, bit IsCommutable = 0> {
@@ -3159,7 +3483,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
v4i32x_info, v2i64x_info, IsCommutable>,
EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
}
-}
+}
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
X86pmuldq, 1>,T8PD;
@@ -3170,25 +3494,25 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
let mayLoad = 1 in {
defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
- (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
+ (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
OpcodeStr,
"${src2}"##_Src.BroadcastStr##", $src1",
"$src1, ${src2}"##_Src.BroadcastStr,
- (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
- (_Src.VT (X86VBroadcast
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+ (_Src.VT (X86VBroadcast
(_Src.ScalarLdFrag addr:$src2))))))>,
EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
}
}
-multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,X86VectorVTInfo _Src,
+multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode,X86VectorVTInfo _Src,
X86VectorVTInfo _Dst> {
- defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
(ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
- "$src2, $src1","$src1, $src2",
- (_Dst.VT (OpNode
- (_Src.VT _Src.RC:$src1),
+ "$src2, $src1","$src1, $src2",
+ (_Dst.VT (OpNode
+ (_Src.VT _Src.RC:$src1),
(_Src.VT _Src.RC:$src2)))>,
EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
let mayLoad = 1 in {
@@ -3229,102 +3553,59 @@ multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
v16i8x_info>, EVEX_V128;
}
}
+
+multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, AVX512VLVectorVTInfo _Src,
+ AVX512VLVectorVTInfo _Dst> {
+ defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
+ _Dst.info512>, EVEX_V512;
+ let Predicates = [HasVLX] in {
+ defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
+ _Dst.info256>, EVEX_V256;
+ defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
+ _Dst.info128>, EVEX_V128;
+ }
+}
+
let Predicates = [HasBWI] in {
defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
+
+ defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
+ avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
+ defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
+ avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
}
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-
-//===----------------------------------------------------------------------===//
-// AVX-512 - Unpack Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
- PatFrag mem_frag, RegisterClass RC,
- X86MemOperand x86memop, string asm,
- Domain d> {
- def rr : AVX512PI<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1, RC:$src2)))],
- d>, EVEX_4V;
- def rm : AVX512PI<opc, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- asm, [(set RC:$dst,
- (vt (OpNode RC:$src1,
- (bitconvert (mem_frag addr:$src2)))))],
- d>, EVEX_4V;
-}
-
-defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64,
- VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64,
- VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64,
- VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64,
- VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
- X86MemOperand x86memop> {
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
- IIC_SSE_UNPCK>, EVEX_4V;
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
- (bitconvert (memop_frag addr:$src2)))))],
- IIC_SSE_UNPCK>, EVEX_4V;
-}
-defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
- VR512, loadv16i32, i512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
- VR512, loadv8i64, i512mem>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
- VR512, loadv16i32, i512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
- VR512, loadv8i64, i512mem>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 Logical Instructions
//===----------------------------------------------------------------------===//
@@ -3362,12 +3643,12 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
let isCodeGenOnly = 1, isCommutable = IsCommutable,
Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
- (ins _.FRC:$src1, _.FRC:$src2),
+ (ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
itins.rr>;
def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
- (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.FRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2)))], itins.rr>;
@@ -3375,7 +3656,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, OpndItins itins, bit IsCommutable> {
+ SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -3470,7 +3751,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
EVEX_4V, EVEX_B;
}
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
IsCommutable>, EVEX_V512, PS,
@@ -3514,7 +3795,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
@@ -3550,13 +3831,34 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}//let mayLoad = 1
}
-multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
+multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
+ defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
+ defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
+ EVEX_4V,EVEX_CD8<32, CD8VT1>;
+ defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
+ EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
@@ -3569,7 +3871,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
}
}
-defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
@@ -3586,7 +3888,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
+ (OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))))>,
EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
@@ -3748,12 +4050,12 @@ multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
VTInfo.info256>, EVEX_V256;
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>,
- avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
+ avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>, EVEX_V128;
}
}
-multiclass avx512_shift_rmi_w<bits<8> opcw,
+multiclass avx512_shift_rmi_w<bits<8> opcw,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -3846,6 +4148,27 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
avx512vl_i64_info>, VEX_W;
}
+// Use 512bit version to implement 128/256 bit in case NoVLX.
+multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
+ let Predicates = [HasBWI, NoVLX] in {
+ def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
+ (_.info256.VT _.info256.RC:$src2))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME#"WZrr")
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
+ sub_ymm)>;
+
+ def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
+ (_.info128.VT _.info128.RC:$src2))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME#"WZrr")
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+ (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
+ sub_xmm)>;
+ }
+}
+
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -3861,11 +4184,14 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
- avx512_var_shift_w<0x12, "vpsllvw", shl>;
+ avx512_var_shift_w<0x12, "vpsllvw", shl>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
- avx512_var_shift_w<0x11, "vpsravw", sra>;
+ avx512_var_shift_w<0x11, "vpsravw", sra>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>,
+ avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
@@ -3916,19 +4242,77 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, avx512vl_f64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPERMIL
+//===----------------------------------------------------------------------===//
+multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
+ defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1,
+ (Ctrl.VT Ctrl.RC:$src2)))>,
+ T8PD, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode
+ _.RC:$src1,
+ (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+ T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ "${src2}"##_.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_.BroadcastStr,
+ (_.VT (OpNode
+ _.RC:$src1,
+ (Ctrl.VT (X86VBroadcast
+ (Ctrl.ScalarLdFrag addr:$src2)))))>,
+ T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+ }//let mayLoad = 1
+}
+
+multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
+ AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
+ Ctrl.info512>, EVEX_V512;
+ }
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
+ Ctrl.info128>, EVEX_V128;
+ defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
+ Ctrl.info256>, EVEX_V256;
+ }
+}
+
+multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
+ AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+
+ defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
+ defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
+ X86VPermilpi, _>,
+ EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
+}
+
+defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
+ avx512vl_i32_info>;
+defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
+ avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
//===----------------------------------------------------------------------===//
defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
- X86PShufd, avx512vl_i32_info>,
+ X86PShufd, avx512vl_i32_info>,
EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
- X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
+ X86PShufhw>, EVEX, AVX512XSIi8Base;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
- X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
-
+ X86PShuflw>, EVEX, AVX512XDIi8Base;
+
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
let Predicates = [HasBWI] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
@@ -3942,55 +4326,6 @@ multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
//===----------------------------------------------------------------------===//
-// AVX-512 - MOVDDUP
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
- X86MemOperand x86memop, PatFrag memop_frag> {
-def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
-def rm : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst,
- (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
-}
-
-defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
- (VMOVDDUPZrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
-// Replicate Single FP - MOVSHDUP and MOVSLDUP
-//===---------------------------------------------------------------------===//
-multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
- ValueType vt, RegisterClass RC, PatFrag mem_frag,
- X86MemOperand x86memop> {
- def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
- let mayLoad = 1 in
- def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
-}
-
-defm VMOVSHDUPZ : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-defm VMOVSLDUPZ : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))),
- (VMOVSHDUPZrm addr:$src)>;
-def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))),
- (VMOVSLDUPZrm addr:$src)>;
-
-//===----------------------------------------------------------------------===//
// Move Low to High and High to Low packed FP Instructions
//===----------------------------------------------------------------------===//
def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
@@ -4017,6 +4352,115 @@ let Predicates = [HasAVX512] in {
}
//===----------------------------------------------------------------------===//
+// VMOVHPS/PD VMOVLPS Instructions
+// All patterns was taken from SSS implementation.
+//===----------------------------------------------------------------------===//
+multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayLoad = 1 in
+ def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1,
+ (_.VT (bitconvert
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
+ IIC_SSE_MOV_LH>, EVEX_4V;
+}
+
+defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
+ v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+
+let Predicates = [HasAVX512] in {
+ // VMOVHPS patterns
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(X86Movlhps VR128X:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVHPD patterns
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+ (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+ (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPS patterns
+ def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+ (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+ def : Pat<(v2f64 (X86Movsd VR128X:$src1,
+ (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+}
+
+let mayStore = 1 in {
+def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
+ (bc_v2f64 (v4f32 VR128X:$src))),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
+ (ins f64mem:$dst, VR128X:$src),
+ "vmovlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128X:$src),
+ (iPTR 0))), addr:$dst)],
+ IIC_SSE_MOV_LH>,
+ EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+let Predicates = [HasAVX512] in {
+ // VMOVHPD patterns
+ def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
+ (iPTR 0))), addr:$dst),
+ (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
+ // VMOVLPS patterns
+ def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
+ (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+ // VMOVLPD patterns
+ def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+ def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+ addr:$src1),
+ (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+}
+//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
//
@@ -4034,7 +4478,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
- AVX512FMA3Base;
+ AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -4435,50 +4879,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from float/double to integer
//===----------------------------------------------------------------------===//
-multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
- string asm> {
-let hasSideEffects = 0 in {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
- Requires<[HasAVX512]>;
- let mayLoad = 1 in
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
- Requires<[HasAVX512]>;
-} // hasSideEffects = 0
+multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int,
+ Operand memop, ComplexPattern mem_cpat, string asm> {
+ let hasSideEffects = 0, Predicates = [HasAVX512] in {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+ def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
+ !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>,
+ EVEX, VEX_LIG, EVEX_B, EVEX_RC;
+ let mayLoad = 1 in
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+ } // hasSideEffects = 0, Predicates = [HasAVX512]
}
-let Predicates = [HasAVX512] in {
+
// Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
ssmem, sse_load_f32, "cvtss2si">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
+ int_x86_sse_cvtss2si64,
ssmem, sse_load_f32, "cvtss2si">,
XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
+ int_x86_avx512_cvtss2usi,
ssmem, sse_load_f32, "cvtss2usi">,
XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtss2usi64, ssmem,
sse_load_f32, "cvtss2usi">, XS, VEX_W,
EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ: avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
sdmem, sse_load_f64, "cvtsd2si">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z: avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64,
+ int_x86_sse2_cvtsd2si64,
sdmem, sse_load_f64, "cvtsd2si">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ: avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32,
+ int_x86_avx512_cvtsd2usi,
sdmem, sse_load_f64, "cvtsd2usi">,
XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
int_x86_avx512_cvtsd2usi64, sdmem,
sse_load_f64, "cvtsd2usi">, XD, VEX_W,
EVEX_CD8<64, CD8VT1>;
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
SSE_CVT_Scalar, 0>, XS, EVEX_4V;
@@ -4495,121 +4944,170 @@ let isCodeGenOnly = 1 in {
defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-} // isCodeGenOnly = 1
+} // isCodeGenOnly = 1, Predicates = [HasAVX512]
// Convert float/double to signed/unsigned int 32/64 with truncation
-let isCodeGenOnly = 1 in {
- defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
- ssmem, sse_load_f32, "cvttss2si">,
- XS, EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
- int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
- "cvttss2si">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
- sdmem, sse_load_f64, "cvttsd2si">, XD,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
- int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
- "cvttsd2si">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
- int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
- "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
- int_x86_avx512_cvttss2usi64, ssmem,
- sse_load_f32, "cvttss2usi">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
- defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
- int_x86_avx512_cvttsd2usi,
- sdmem, sse_load_f64, "cvttsd2usi">, XD,
- EVEX_CD8<64, CD8VT1>;
- defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
- int_x86_avx512_cvttsd2usi64, sdmem,
- sse_load_f64, "cvttsd2usi">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
-} // isCodeGenOnly = 1
-
-multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
+ X86VectorVTInfo _DstRC, SDNode OpNode,
+ SDNode OpNodeRnd>{
+let Predicates = [HasAVX512] in {
+ def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
+ def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+ !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+ []>, EVEX, EVEX_B;
+ def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
-}
-
-defm VCVTTSS2SIZ : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
- loadf32, "cvttss2si">, XS,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USIZ : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
- loadf32, "cvttss2usi">, XS,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64Z : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
- loadf32, "cvttss2si">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
- loadf32, "cvttss2usi">, XS, VEX_W,
- EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIZ : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
- loadf64, "cvttsd2si">, XD,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USIZ : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
- loadf64, "cvttsd2usi">, XD,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64Z : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
- loadf64, "cvttsd2si">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
- loadf64, "cvttsd2usi">, XD, VEX_W,
- EVEX_CD8<64, CD8VT1>;
+ [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
+ EVEX;
+
+ let isCodeGenOnly = 1,hasSideEffects = 0 in {
+ def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+ (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
+ def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+ !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+ [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+ (i32 FROUND_NO_EXC)))]>,
+ EVEX,VEX_LIG , EVEX_B;
+ let mayLoad = 1 in
+ def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
+ (ins _SrcRC.MemOp:$src),
+ !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+ []>, EVEX, VEX_LIG;
+
+ } // isCodeGenOnly = 1, hasSideEffects = 0
+} //HasAVX512
+}
+
+
+defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
+ fp_to_sint,X86cvttss2IntRnd>,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
+ fp_to_sint,X86cvttss2IntRnd>,
+ VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
+ fp_to_sint,X86cvttsd2IntRnd>,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
+ fp_to_sint,X86cvttsd2IntRnd>,
+ VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
+ fp_to_uint,X86cvttss2UIntRnd>,
+ XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
+ fp_to_uint,X86cvttss2UIntRnd>,
+ XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
+ fp_to_uint,X86cvttsd2UIntRnd>,
+ XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
+ fp_to_uint,X86cvttsd2UIntRnd>,
+ XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+let Predicates = [HasAVX512] in {
+ def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
+ (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
+ (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
+ (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+ def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
+ (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+
} // HasAVX512
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in {
-def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
- (ins FR32X:$src1, FR32X:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
- (ins FR32X:$src1, f32mem:$src2),
- "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
- EVEX_CD8<32, CD8VT1>;
-
-// Convert scalar double to scalar single
-def VCVTSD2SSZrr : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
- (ins FR64X:$src1, FR64X:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSD2SSZrm : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
- (ins FR64X:$src1, f64mem:$src2),
- "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX_4V, VEX_LIG, VEX_W,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
-}
-
-def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
- Requires<[HasAVX512]>;
-def : Pat<(fextend (loadf32 addr:$src)),
- (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
-
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNode> {
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2)))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+ (_Src.VT (scalar_to_vector
+ (_Src.ScalarLdFrag addr:$src2)))))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+}
+
+// Scalar Coversion with SAE - suppress all exceptions
+multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2),
+ (i32 FROUND_NO_EXC)))>,
+ EVEX_4V, VEX_LIG, EVEX_B;
+}
+
+// Scalar Conversion with rounding control (RC)
+multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+ EVEX_B, EVEX_RC;
+}
+multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, X86VectorVTInfo _src,
+ X86VectorVTInfo _dst> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+ avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
+ OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
+ EVEX_V512, XD;
+ }
+}
+
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, X86VectorVTInfo _src,
+ X86VectorVTInfo _dst> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+ avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
+ EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
+ }
+}
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
+ X86froundRnd, f64x_info, f32x_info>;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
+ X86fpextRnd,f32x_info, f64x_info >;
+
+def : Pat<(f64 (fextend FR32X:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
+ (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+ Requires<[HasAVX512]>;
+def : Pat<(f64 (fextend (loadf32 addr:$src))),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ Requires<[HasAVX512]>;
+
+def : Pat<(f64 (extloadf32 addr:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[HasAVX512, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
- Requires<[HasAVX512, OptForSpeed]>;
+def : Pat<(f64 (extloadf32 addr:$src)),
+ (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
+ (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+ Requires<[HasAVX512, OptForSpeed]>;
-def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+def : Pat<(f32 (fround FR64X:$src)),
+ (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
+ (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
Requires<[HasAVX512]>;
-
//===----------------------------------------------------------------------===//
// AVX-512 Vector convert from signed/unsigned integer to float/double
// and from float/double to signed/unsigned integer
@@ -4992,7 +5490,7 @@ defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
-let Predicates = [NoVLX] in {
+let Predicates = [HasAVX512, NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -5024,40 +5522,102 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}",
- []>, EVEX;
- let hasSideEffects = 0, mayLoad = 1 in
- def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
- "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
-}
-
-multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
- X86MemOperand x86memop> {
- def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
- (ins srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, EVEX;
- let hasSideEffects = 0, mayStore = 1 in
- def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
- (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2),
- "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop, PatFrag ld_frag> {
+ defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_CURRENT))>, T8PD;
+ let hasSideEffects = 0, mayLoad = 1 in {
+ defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
+ "vcvtph2ps", "$src", "$src",
+ (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
+ (i32 FROUND_CURRENT))>, T8PD;
+ }
}
-defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
+multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+ defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+ "vcvtph2ps", "{sae}, $src", "$src, {sae}",
+ (X86cvtph2ps (_src.VT _src.RC:$src),
+ (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
-def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
- imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
- (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
+}
-def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
- (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
- (VCVTPH2PSZrr VR256X:$src)>;
+let Predicates = [HasAVX512] in {
+ defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
+ avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
+ loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
+ loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
+}
+
+multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
+ X86MemOperand x86memop> {
+ defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, $src1", "$src1, $src2",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>, AVX512AIi8Base;
+ let hasSideEffects = 0, mayStore = 1 in {
+ def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2), (i32 FROUND_CURRENT) )),
+ addr:$dst)]>;
+ def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+ (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ []>, EVEX_K;
+ }
+}
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+ defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+ (ins _src.RC:$src1, i32u8imm:$src2),
+ "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}",
+ (X86cvtps2ph (_src.VT _src.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base;
+}
+let Predicates = [HasAVX512] in {
+ defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
+ avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+ let Predicates = [HasVLX] in {
+ defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+ EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+ EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+ }
+}
+
+// Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
+ string OpcodeStr> {
+ def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+ !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
+ [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
+ (i32 FROUND_NO_EXC)))],
+ IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
+ Sched<[WriteFAdd]>;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+ defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
+ AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+ defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
+ AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
+ AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+ defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
+ AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
@@ -5067,10 +5627,10 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
"ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
let Pattern = []<dag> in {
- defm VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+ defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
"comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+ defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
"comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
@@ -5092,50 +5652,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
}
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
-multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop> {
- let hasSideEffects = 0 in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in {
+ defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+ defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
}
}
}
-defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
- EVEX_CD8<32, CD8VT1>;
-defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
- VEX_W, EVEX_CD8<64, CD8VT1>;
-
-def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
-
-def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
- (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
- (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
- (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
- (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
- (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
+defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
+ EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
+ VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
+defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
+ EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
+ VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5183,20 +5724,6 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
-def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VRSQRT14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VRSQRT14PDZr VR512:$src)>;
-
-def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VRCP14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VRCP14PDZr VR512:$src)>;
-
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode> {
@@ -5232,6 +5759,8 @@ let hasSideEffects = 0, Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
}
+
+defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -5322,67 +5851,6 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
}
}
-multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
- Intrinsic F32Int, Intrinsic F64Int,
- OpndItins itins_s, OpndItins itins_d> {
- def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
- (ins FR32X:$src1, FR32X:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], itins_s.rr>, XS, EVEX_4V;
- let isCodeGenOnly = 1 in
- def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F32Int VR128X:$src1, VR128X:$src2))],
- itins_s.rr>, XS, EVEX_4V;
- let mayLoad = 1 in {
- def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
- (ins FR32X:$src1, f32mem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
- let isCodeGenOnly = 1 in
- def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, ssmem:$src2),
- !strconcat(OpcodeStr,
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F32Int VR128X:$src1, sse_load_f32:$src2))],
- itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
- }
- def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
- (ins FR64X:$src1, FR64X:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- XD, EVEX_4V, VEX_W;
- let isCodeGenOnly = 1 in
- def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, VR128X:$src2))],
- itins_s.rr>, XD, EVEX_4V, VEX_W;
- let mayLoad = 1 in {
- def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
- (ins FR64X:$src1, f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
- let isCodeGenOnly = 1 in
- def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
- (ins VR128X:$src1, sdmem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
- XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
- }
-}
-
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
@@ -5416,93 +5884,77 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
}
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+ string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
+
+ defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in
+ defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src2))),
+ (i32 FROUND_CURRENT))>;
+
+ defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (OpNodeRnd (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 imm:$rc))>,
+ EVEX_B, EVEX_RC;
+
+ let isCodeGenOnly = 1 in {
+ def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+
+ let mayLoad = 1 in
+ def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+ }
+
+ def : Pat<(_.EltVT (OpNode _.FRC:$src)),
+ (!cast<Instruction>(NAME#SUFF#Zr)
+ (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
+
+ def : Pat<(_.EltVT (OpNode (load addr:$src))),
+ (!cast<Instruction>(NAME#SUFF#Zm)
+ (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
+}
+
+multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
+ defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
+ X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
+ defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
+ X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
+}
+
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
-defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
- int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
- SSE_SQRTSS, SSE_SQRTSD>;
+defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
let Predicates = [HasAVX512] in {
- def : Pat<(f32 (fsqrt FR32X:$src)),
- (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
- def : Pat<(f32 (fsqrt (load addr:$src))),
- (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[OptForSize]>;
- def : Pat<(f64 (fsqrt FR64X:$src)),
- (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
- def : Pat<(f64 (fsqrt (load addr:$src))),
- (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
- Requires<[OptForSize]>;
-
def : Pat<(f32 (X86frsqrt FR32X:$src)),
- (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
def : Pat<(f32 (X86frsqrt (load addr:$src))),
- (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
-
def : Pat<(f32 (X86frcp FR32X:$src)),
- (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+ (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
def : Pat<(f32 (X86frcp (load addr:$src))),
- (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
Requires<[OptForSize]>;
-
- def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
- (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS VR128X:$src, FR32)),
- VR128X)>;
- def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
- (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
-
- def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
- (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS VR128X:$src, FR64)),
- VR128X)>;
- def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
- (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
-}
-
-
-multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, RegisterClass RC,
- PatFrag mem_frag, Domain d> {
-let ExeDomain = d in {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def r : AVX512AIi8<opc, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX;
-
- // Vector intrinsic operation, mem
- def m : AVX512AIi8<opc, MRMSrcMem,
- (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX;
-} // ExeDomain
}
-defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
- loadv16f32, SSEPackedSingle>, EVEX_V512,
- EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
- imm:$src2, (v16f32 VR512:$src1), (i16 -1),
- FROUND_CURRENT)),
- (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
-
-
-defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
- loadv8f64, SSEPackedDouble>, EVEX_V512,
- VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
- imm:$src2, (v8f64 VR512:$src1), (i8 -1),
- FROUND_CURRENT)),
- (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
-
multiclass
avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
@@ -5510,20 +5962,20 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
- "{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
- (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
+ (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
let mayLoad = 1 in
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (X86RndScale (_.VT _.RC:$src1),
+ (_.VT (X86RndScales (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3), (i32 FROUND_CURRENT)))>;
}
@@ -5568,109 +6020,238 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
-let Predicates = [HasAVX512] in {
-def : Pat<(v16f32 (ffloor VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
-def : Pat<(v16f32 (fnearbyint VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
-def : Pat<(v16f32 (fceil VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
-def : Pat<(v16f32 (frint VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
-def : Pat<(v16f32 (ftrunc VR512:$src)),
- (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
-
-def : Pat<(v8f64 (ffloor VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
-def : Pat<(v8f64 (fnearbyint VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
-def : Pat<(v8f64 (fceil VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
-def : Pat<(v8f64 (frint VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
-def : Pat<(v8f64 (ftrunc VR512:$src)),
- (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-}
//-------------------------------------------------
// Integer truncate and extend operations
//-------------------------------------------------
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
- RegisterClass dstRC, RegisterClass srcRC,
- RegisterClass KRC, X86MemOperand x86memop> {
- def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins srcRC:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
+ X86MemOperand x86memop> {
+
+ defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
+ EVEX, T8XS;
+
+ // for intrinsic patter match
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ undef)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
+ DestInfo.KRCWM:$mask ,
+ SrcInfo.RC:$src1)>;
+
+ let mayStore = 1 in {
+ def mr : AVX512XS8I<opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst |$dst, $src}",
[]>, EVEX;
- def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+ OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
[]>, EVEX, EVEX_K;
+ }//mayStore = 1
+}
- def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
- (ins KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
+multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
+ X86VectorVTInfo DestInfo,
+ PatFrag truncFrag, PatFrag mtruncFrag > {
- def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- []>, EVEX;
+ def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
+ addr:$dst, SrcInfo.RC:$src)>;
- def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
- (ins x86memop:$dst, KRC:$mask, srcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
+ def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
+ (SrcInfo.VT SrcInfo.RC:$src)),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
+ addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
+}
+
+multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
+ X86VectorVTInfo DestInfo, string sat > {
+
+ def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+ DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+ addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
+ (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
+ (SrcInfo.VT SrcInfo.RC:$src))>;
+ def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+ DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+ addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
+ (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
+ (SrcInfo.VT SrcInfo.RC:$src))>;
}
-defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVUSQB : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVQW : avx512_trunc_sat<0x34, "vpmovqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVSQW : avx512_trunc_sat<0x24, "vpmovsqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVUSQW : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
- i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVQD : avx512_trunc_sat<0x35, "vpmovqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVSQD : avx512_trunc_sat<0x25, "vpmovsqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVUSQD : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
- i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVDW : avx512_trunc_sat<0x33, "vpmovdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVSDW : avx512_trunc_sat<0x23, "vpmovsdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVUSDW : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
- i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVDB : avx512_trunc_sat<0x31, "vpmovdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVSDB : avx512_trunc_sat<0x21, "vpmovsdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVUSDB : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
- i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-
-def : Pat<(v16i8 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQBrr VR512:$src)>;
-def : Pat<(v8i16 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQWrr VR512:$src)>;
-def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr VR512:$src)>;
-def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
-def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
-
-def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
-def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
+multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+ X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+ X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+ X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
+ Predicate prd = HasAVX512>{
+
+ let Predicates = [HasVLX, prd] in {
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+ DestInfoZ128, x86memopZ128>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+ truncFrag, mtruncFrag>, EVEX_V128;
+
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+ DestInfoZ256, x86memopZ256>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+ truncFrag, mtruncFrag>, EVEX_V256;
+ }
+ let Predicates = [prd] in
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+ DestInfoZ, x86memopZ>,
+ avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+ truncFrag, mtruncFrag>, EVEX_V512;
+}
+
+multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+ X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+ X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+ X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
+
+ let Predicates = [HasVLX, prd] in {
+ defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+ DestInfoZ128, x86memopZ128>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+ sat>, EVEX_V128;
+
+ defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+ DestInfoZ256, x86memopZ256>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+ sat>, EVEX_V256;
+ }
+ let Predicates = [prd] in
+ defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+ DestInfoZ, x86memopZ>,
+ avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+ sat>, EVEX_V512;
+}
+
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+ truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
+}
+multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+ sat>, EVEX_CD8<8, CD8VO>;
+}
+
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+ truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
+}
+multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
+ v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+ sat>, EVEX_CD8<16, CD8VQ>;
+}
+
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+ v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+ truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
+}
+multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
+ v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+ sat>, EVEX_CD8<32, CD8VH>;
+}
+
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+ truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
+}
+multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
+ v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+ sat>, EVEX_CD8<8, CD8VQ>;
+}
+
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+ v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+ truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
+ v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+ sat>, EVEX_CD8<16, CD8VH>;
+}
+
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+ v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+ truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
+ defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
+ v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+ sat, HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
+defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>;
+defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>;
+
+defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
+defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>;
+defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>;
+
+defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
+defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>;
+defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>;
+
+defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
+defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>;
+defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>;
+
+defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
+defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>;
+defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>;
+
+defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
+defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>;
+defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>;
+
+let Predicates = [HasAVX512, NoVLX] in {
+def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
+ (v8i16 (EXTRACT_SUBREG
+ (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm)))), sub_xmm))>;
+def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
+ (v4i32 (EXTRACT_SUBREG
+ (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm)))), sub_xmm))>;
+}
+
+let Predicates = [HasBWI, NoVLX] in {
+def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
+ (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
+ VR256X:$src, sub_ymm))), sub_xmm))>;
+}
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
@@ -5985,163 +6566,11 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-//===----------------------------------------------------------------------===//
-// VSHUFPS - VSHUFPD Operations
-
-multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
- ValueType vt, string OpcodeStr, PatFrag mem_frag,
- Domain d> {
- def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
- (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
- EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
- def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2, u8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
- (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
- EVEX_4V, Sched<[WriteShuffle]>;
-}
-
-defm VSHUFPSZ : avx512_shufp<VR512, f512mem, v16f32, "vshufps", loadv16f32,
- SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VSHUFPDZ : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", loadv8f64,
- SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v16i32 (X86Shufp VR512:$src1,
- (loadv16i32 addr:$src2), (i8 imm:$imm))),
- (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
- (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v8i64 (X86Shufp VR512:$src1,
- (loadv8i64 addr:$src2), (i8 imm:$imm))),
- (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
// Helper fragments to match sext vXi1 to vXiY.
def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
- RegisterClass RC, RegisterClass KRC,
- X86MemOperand x86memop,
- X86MemOperand x86scalar_mop, string BrdcstStr> {
- let hasSideEffects = 0 in {
- def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
- []>, EVEX;
- let mayLoad = 1 in
- def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
- []>, EVEX;
- let mayLoad = 1 in
- def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
- []>, EVEX, EVEX_B;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in
- def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, x86scalar_mop:$src),
- !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
- ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
- BrdcstStr, "}"),
- []>, EVEX, EVEX_KZ, EVEX_B;
-
- let Constraints = "$src1 = $dst" in {
- def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
- []>, EVEX, EVEX_K;
- let mayLoad = 1 in
- def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
- ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
- []>, EVEX, EVEX_K, EVEX_B;
- }
- }
-}
-
-let Predicates = [HasCDI] in {
-defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
- i512mem, i32mem, "{1to16}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
- i512mem, i64mem, "{1to8}">,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
- GR16:$mask),
- (VPCONFLICTDrrk VR512:$src1,
- (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
- GR8:$mask),
- (VPCONFLICTQrrk VR512:$src1,
- (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-let Predicates = [HasCDI] in {
-defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
- i512mem, i32mem, "{1to16}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
- i512mem, i64mem, "{1to8}">,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
- GR16:$mask),
- (VPLZCNTDrrk VR512:$src1,
- (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
- GR8:$mask),
- (VPLZCNTQrrk VR512:$src1,
- (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))),
- (VPLZCNTDrm addr:$src)>;
-def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
- (VPLZCNTDrr VR512:$src)>;
-def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))),
- (VPLZCNTQrm addr:$src)>;
-def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
- (VPLZCNTQrr VR512:$src)>;
-
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
@@ -6197,7 +6626,7 @@ defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set _.KRC:$dst, (trunc (_.VT _.RC:$src)))]>, EVEX;
+ [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
}
multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
@@ -6230,7 +6659,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
let mayStore = 1 in {
@@ -6242,7 +6671,7 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(store (_.VT (vselect _.KRCWM:$mask,
+ [(store (_.VT (vselect _.KRCWM:$mask,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
@@ -6272,7 +6701,7 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
+ (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
let mayLoad = 1 in
@@ -6302,6 +6731,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
EVEX, VEX_W;
+//handle instruction reg_vec1 = op(reg_vec,imm)
+// op(mem_vec,imm)
+// op(broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>;
+ defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
+ "${src1}"##_.BroadcastStr##", $src2",
+ (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
+ (i32 imm:$src2),
+ (i32 FROUND_CURRENT))>, EVEX_B;
+ }
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, X86VectorVTInfo _>{
+ defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+ "$src1, {sae}, $src2",
+ (OpNode (_.VT _.RC:$src1),
+ (i32 imm:$src2),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+
+multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
+ AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+ let Predicates = [prd] in {
+ defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+ EVEX_V512;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128;
+ defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256;
+ }
+}
+
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
@@ -6309,49 +6794,60 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>, EVEX_B;
}
}
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
+multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
+
+ defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT SrcInfo.RC:$src2),
+ (i8 imm:$src3)))>;
+ let mayLoad = 1 in
+ defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
+ OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+ (SrcInfo.VT (bitconvert
+ (SrcInfo.LdFrag addr:$src2))),
+ (i8 imm:$src3)))>;
+}
+
+//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
+// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
- defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT _.RC:$src2),
- (i8 imm:$src3))>;
- let mayLoad = 1 in {
- defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
- OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))),
- (i8 imm:$src3))>;
+ X86VectorVTInfo _>:
+ avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+
+ let mayLoad = 1 in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -6359,7 +6855,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B;
- }
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -6369,20 +6864,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let mayLoad = 1 in {
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_CURRENT))>;
let isAsmParserOnly = 1 in {
@@ -6398,18 +6893,25 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3,{sae}, $src2, $src1",
"$src1, $src2,{sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- (i8 imm:$src3),
+ (i32 imm:$src3),
(i32 FROUND_NO_EXC))>, EVEX_B;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
- defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+ defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
+ OpcodeStr, "$src3,{sae}, $src2, $src1",
+ "$src1, $src2,{sae}, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (i32 imm:$src3),
+ (i32 FROUND_NO_EXC))>, EVEX_B;
}
multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
@@ -6428,6 +6930,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
}
}
+multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
+ AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
+ let Predicates = [HasBWI] in {
+ defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
+ SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
+ }
+ let Predicates = [HasBWI, HasVLX] in {
+ defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
+ SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
+ defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
+ SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
+ }
+}
+
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode>{
let Predicates = [HasAVX512] in {
@@ -6447,6 +6963,14 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
}
}
+multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
+ bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
+ defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
+ opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
+ defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
+ opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6461,6 +6985,14 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
0x55, X86VFixupimm, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
+ X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
+defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
+ X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
+defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
+ X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
+
+
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, HasDQI>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6475,6 +7007,19 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86VRange, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
+ 0x57, X86Reduces, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
+ 0x57, X86Reduces, HasDQI>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+
+defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
+ 0x27, X86GetMants, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
+ 0x27, X86GetMants, HasAVX512>,
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
bits<8> opc, SDNode OpNode = X86Shuf128>{
@@ -6486,6 +7031,29 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
}
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 (ffloor VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+ (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+ (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+}
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@@ -6496,31 +7064,51 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
- AVX512VLVectorVTInfo VTInfo_FP>{
+multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
AVX512AIi8Base, EVEX_4V;
- let isCodeGenOnly = 1 in {
- defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
- AVX512AIi8Base, EVEX_4V;
- }
}
-defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
+defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
EVEX_CD8<32, CD8VF>;
-defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
+defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
+multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
+ let Predicates = p in
+ def NAME#_.VTName#rri:
+ Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rri)
+ _.RC:$src1, _.RC:$src2, imm:$imm)>;
+}
+
+multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
+ avx512_vpalign_lowering<_.info512, [HasBWI]>,
+ avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
+ avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
+
+defm VPALIGN: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
+ avx512vl_i8_info, avx512vl_i8_info>,
+ avx512_vpalign_lowering_common<avx512vl_i16_info>,
+ avx512_vpalign_lowering_common<avx512vl_i32_info>,
+ avx512_vpalign_lowering_common<avx512vl_f32_info>,
+ avx512_vpalign_lowering_common<avx512vl_i64_info>,
+ avx512_vpalign_lowering_common<avx512vl_f64_info>,
+ EVEX_CD8<8, CD8VF>;
+
+defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
+ avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
+
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1), OpcodeStr##_.Suffix,
+ (ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
let mayLoad = 1 in
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+ (ins _.MemOp:$src1), OpcodeStr,
"$src1", "$src1",
(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
@@ -6531,7 +7119,7 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
let mayLoad = 1 in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+ (ins _.ScalarMemOp:$src1), OpcodeStr,
"${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr,
(_.VT (OpNode (X86VBroadcast
@@ -6568,15 +7156,16 @@ multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, Predicate prd> {
- defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+ defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
prd>, VEX_W;
- defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+ defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
+ prd>;
}
multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, Predicate prd> {
- defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
- defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+ defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
+ defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
}
multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
@@ -6598,3 +7187,332 @@ def : Pat<(xor
(bc_v8i64 (v8i1sextv8i64)),
(bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
(VPABSQZrr VR512:$src)>;
+
+multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
+
+ defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
+}
+
+defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
+defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+
+//===---------------------------------------------------------------------===//
+// Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+ defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
+ HasAVX512>, XS;
+}
+
+defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
+defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - MOVDDUP
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_.VT (scalar_to_vector
+ (_.ScalarLdFrag addr:$src)))))>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+}
+
+multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo> {
+
+ defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
+ EVEX_V128;
+ }
+}
+
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+ defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
+ avx512vl_f64_info>, XD, VEX_W;
+}
+
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
+
+def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+ (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+ (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>;
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>;
+
+defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasBWI>;
+
+defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
+ SSE_INTALU_ITINS_P, HasAVX512>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Extract & Insert Integer Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let mayStore = 1 in
+ def mr : AVX512Ii8<opc, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
+ imm:$src2)))),
+ addr:$dst)]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst,
+ (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, TAPD;
+
+ defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
+ }
+}
+
+multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32orGR64:$dst,
+ (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, PD;
+
+ def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX, TAPD;
+
+ defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
+ }
+}
+
+multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
+ RegisterClass GRC> {
+ let Predicates = [HasDQI] in {
+ def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
+ (ins _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GRC:$dst,
+ (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
+ EVEX, TAPD;
+
+ let mayStore = 1 in
+ def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
+ (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(store (extractelt (_.VT _.RC:$src1),
+ imm:$src2),addr:$dst)]>,
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
+ }
+}
+
+defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
+defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
+defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
+defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
+
+multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, PatFrag LdFrag> {
+ def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, PatFrag LdFrag> {
+ let Predicates = [HasBWI] in {
+ def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
+
+ defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
+ }
+}
+
+multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _, RegisterClass GRC> {
+ let Predicates = [HasDQI] in {
+ def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+ (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
+ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ [(set _.RC:$dst,
+ (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
+ EVEX_4V, TAPD;
+
+ defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
+ _.ScalarLdFrag>, TAPD;
+ }
+}
+
+defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
+ extloadi8>, TAPD;
+defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
+ extloadi16>, PD;
+defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
+defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+//===----------------------------------------------------------------------===//
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+ AVX512VLVectorVTInfo VTInfo_FP>{
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
+ EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
+ AVX512AIi8Base, EVEX_4V;
+}
+
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - Byte shift Left/Right
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
+ Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
+ def rr : AVX512<opc, MRMr,
+ (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
+ let mayLoad = 1 in
+ def rm : AVX512<opc, MRMm,
+ (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _.RC:$dst,(_.VT (OpNode
+ (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
+}
+
+multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
+ Format MRMm, string OpcodeStr, Predicate prd>{
+ let Predicates = [prd] in
+ defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v8i64_info>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v4i64x_info>, EVEX_V256;
+ defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
+ OpcodeStr, v2i64x_info>, EVEX_V128;
+ }
+}
+defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
+ HasBWI>, AVX512PDIi8Base, EVEX_4V;
+defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
+ HasBWI>, AVX512PDIi8Base, EVEX_4V;
+
+
+multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
+ string OpcodeStr, X86VectorVTInfo _dst,
+ X86VectorVTInfo _src>{
+ def rr : AVX512BI<opc, MRMSrcReg,
+ (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _dst.RC:$dst,(_dst.VT
+ (OpNode (_src.VT _src.RC:$src1),
+ (_src.VT _src.RC:$src2))))]>;
+ let mayLoad = 1 in
+ def rm : AVX512BI<opc, MRMSrcMem,
+ (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set _dst.RC:$dst,(_dst.VT
+ (OpNode (_src.VT _src.RC:$src1),
+ (_src.VT (bitconvert
+ (_src.LdFrag addr:$src2))))))]>;
+}
+
+multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
+ string OpcodeStr, Predicate prd> {
+ let Predicates = [prd] in
+ defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
+ v64i8_info>, EVEX_V512;
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
+ v32i8x_info>, EVEX_V256;
+ defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
+ v16i8x_info>, EVEX_V128;
+ }
+}
+
+defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
+ HasBWI>, EVEX_4V;
+
+multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _>{
+ let Constraints = "$src1 = $dst" in {
+ defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT _.RC:$src3),
+ (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
+ let mayLoad = 1 in {
+ defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT (bitconvert (_.LdFrag addr:$src3))),
+ (i8 imm:$src4))>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
+ OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
+ "$src2, ${src3}"##_.BroadcastStr##", $src4",
+ (OpNode (_.VT _.RC:$src1),
+ (_.VT _.RC:$src2),
+ (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+ (i8 imm:$src4))>, EVEX_B,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ }
+ }// Constraints = "$src1 = $dst"
+}
+
+multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
+ defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
+ }
+}
+
+defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
+defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
+
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5e19ad4..1a2e786 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -615,14 +615,14 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
-def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
- Imm8 , i8imm , imm, i8imm , invalid_node,
+def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
+ Imm8, i8imm, imm8_su, i8imm, invalid_node,
0, OpSizeFixed, 0>;
def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
- Imm16, i16imm, imm, i16i8imm, i16immSExt8,
+ Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
1, OpSize16, 0>;
def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
- Imm32, i32imm, imm, i32i8imm, i32immSExt8,
+ Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
1, OpSize32, 0>;
def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
@@ -928,15 +928,22 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let hasSideEffects = 0;
}
-// BinOpAI_FF - Instructions like "adc %eax, %eax, imm", that implicitly define
+// BinOpAI_RFF - Instructions like "adc %eax, %eax, imm", that implicitly define
// and use EFLAGS.
-class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- Register areg, string operands>
+class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
: BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
IIC_BIN_CARRY_NONMEM> {
let Uses = [areg, EFLAGS];
}
+// BinOpAI_F - Instructions like "cmp %eax, %eax, imm", that imp-def EFLAGS.
+class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ Register areg, string operands>
+ : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
+ let Defs = [EFLAGS];
+}
+
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
///
@@ -1092,14 +1099,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Uses = [EFLAGS], Defs = [EFLAGS]
- def NAME#8i8 : BinOpAI_FF<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI_FF<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def NAME#8i8 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def NAME#32i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def NAME#64i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
}
/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1170,14 +1177,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Defs = [EFLAGS]
- def NAME#8i8 : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
- "{$src, %al|al, $src}">;
- def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def NAME#8i8 : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
}
@@ -1246,14 +1253,14 @@ let isCompare = 1 in {
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]
- def TEST8i8 : BinOpAI<0xA8, "test", Xi8 , AL,
- "{$src, %al|al, $src}">;
- def TEST16i16 : BinOpAI<0xA8, "test", Xi16, AX,
- "{$src, %ax|ax, $src}">;
- def TEST32i32 : BinOpAI<0xA8, "test", Xi32, EAX,
- "{$src, %eax|eax, $src}">;
- def TEST64i32 : BinOpAI<0xA8, "test", Xi64, RAX,
- "{$src, %rax|rax, $src}">;
+ def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,
+ "{$src, %al|al, $src}">;
+ def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX,
+ "{$src, %ax|ax, $src}">;
+ def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX,
+ "{$src, %eax|eax, $src}">;
+ def TEST64i32 : BinOpAI_F<0xA8, "test", Xi64, RAX,
+ "{$src, %rax|rax, $src}">;
} // isCompare
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
index 2056056..787f15b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -156,10 +156,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
Flags |= MachineMemOperand::MOLoad;
if (MCID.mayStore())
Flags |= MachineMemOperand::MOStore;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
- Flags, MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+ MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
return addOffset(MIB.addFrameIndex(FI), Offset)
.addMemOperand(MMO);
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 315f213..c73c950 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
-// SetCC instructions.
+// CMOV instructions.
multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
isCommutable = 1, SchedRW = [WriteALU] in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 7f850d6..5d7283f 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -132,26 +132,6 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
Requires<[In64BitMode]>;
}
-// The MSVC runtime contains an _ftol2 routine for converting floating-point
-// to integer values. It has a strange calling convention: the input is
-// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
-// used as a temporary register. No other registers (aside from flags) are
-// touched.
-// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
-// variant is unnecessary.
-
-let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
- def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
- "# win32 fptoui",
- [(X86WinFTOL RFP32:$src)]>,
- Requires<[Not64BitMode]>;
-
- def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
- "# win32 fptoui",
- [(X86WinFTOL RFP64:$src)]>,
- Requires<[Not64BitMode]>;
-}
-
//===----------------------------------------------------------------------===//
// EH Pseudo Instructions
//
@@ -172,6 +152,29 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
}
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+ isCodeGenOnly = 1, isReturn = 1 in {
+ def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
+
+ // CATCHRET needs a custom inserter for SEH.
+ let usesCustomInserter = 1 in
+ def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),
+ "# CATCHRET",
+ [(catchret bb:$dst, bb:$from)]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+ usesCustomInserter = 1 in
+def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;
+
+// This instruction is responsible for re-establishing stack pointers after an
+// exception has been caught and we are rejoining normal control flow in the
+// parent function or funclet. It generally sets ESP and EBP, and optionally
+// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us
+// elsewhere.
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in
+def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;
+
let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
usesCustomInserter = 1 in {
def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
@@ -247,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1 in
+ isPseudo = 1, AddedComplexity = 20 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
@@ -259,6 +262,33 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
let AddedComplexity = 20;
}
+let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+ AddedComplexity = 15 in {
+ // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
+ // which only require 3 bytes compared to MOV32ri which requires 5.
+ let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
+ def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 1)]>;
+ def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, -1)]>;
+ }
+
+ // MOV16ri is 4 bytes, so the instructions above are smaller.
+ def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
+ def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
+}
+
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
+// FIXME: Add itinerary class and Schedule.
+def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
+ [(set GR32:$dst, i32immSExt8:$src)]>,
+ Requires<[OptForMinSize]>;
+def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
+ [(set GR64:$dst, i64immSExt8:$src)]>,
+ Requires<[OptForMinSize, NotWin64WithoutFP]>;
+}
+
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
@@ -268,9 +298,9 @@ def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
"", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
// This 64-bit pseudo-move can be used for both a 64-bit constant that is
-// actually the zero-extension of a 32-bit constant, and for labels in the
+// actually the zero-extension of a 32-bit constant and for labels in the
// x86-64 small code model.
-def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
+def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
let AddedComplexity = 1 in
def : Pat<(i64 mov64imm32:$src),
@@ -509,6 +539,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
+ defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;
defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
@@ -752,67 +783,111 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
/* The following multiclass tries to make sure that in code like
* x.store (immediate op x.load(acquire), release)
+ * and
+ * x.store (register op x.load(acquire), release)
* an operation directly on memory is generated instead of wasting a register.
* It is not automatic as atomic_store/load are only lowered to MOV instructions
* extremely late to prevent them from being accidentally reordered in the backend
* (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
*/
-multiclass RELEASE_BINOP_MI<string op> {
+multiclass RELEASE_BINOP_MI<SDNode op> {
def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"8mi PSEUDO!",
+ [(atomic_store_8 addr:$dst, (op
(atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
+ def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
+ "#BINOP "#NAME#"8mr PSEUDO!",
+ [(atomic_store_8 addr:$dst, (op
+ (atomic_load_8 addr:$dst), GR8:$src))]>;
// NAME#16 is not generated as 16-bit arithmetic instructions are considered
// costly and avoided as far as possible by this backend anyway
def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"32mi PSEUDO!",
+ [(atomic_store_32 addr:$dst, (op
(atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
+ def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+ "#BINOP "#NAME#"32mr PSEUDO!",
+ [(atomic_store_32 addr:$dst, (op
+ (atomic_load_32 addr:$dst), GR32:$src))]>;
def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_BINOP PSEUDO!",
- [(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
+ "#BINOP "#NAME#"64mi32 PSEUDO!",
+ [(atomic_store_64 addr:$dst, (op
(atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
+ def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+ "#BINOP "#NAME#"64mr PSEUDO!",
+ [(atomic_store_64 addr:$dst, (op
+ (atomic_load_64 addr:$dst), GR64:$src))]>;
+}
+let Defs = [EFLAGS] in {
+ defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
+ defm RELEASE_AND : RELEASE_BINOP_MI<and>;
+ defm RELEASE_OR : RELEASE_BINOP_MI<or>;
+ defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
+ // Note: we don't deal with sub, because substractions of constants are
+ // optimized into additions before this code can run.
+}
+
+// Same as above, but for floating-point.
+// FIXME: imm version.
+// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
+// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
+let usesCustomInserter = 1 in {
+multiclass RELEASE_FP_BINOP_MI<SDNode op> {
+ def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
+ "#BINOP "#NAME#"32mr PSEUDO!",
+ [(atomic_store_32 addr:$dst,
+ (i32 (bitconvert (op
+ (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
+ FR32:$src))))]>, Requires<[HasSSE1]>;
+ def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
+ "#BINOP "#NAME#"64mr PSEUDO!",
+ [(atomic_store_64 addr:$dst,
+ (i64 (bitconvert (op
+ (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
+ FR64:$src))))]>, Requires<[HasSSE2]>;
+}
+defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
+// FIXME: Add fsub, fmul, fdiv, ...
}
-defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
-defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
-defm RELEASE_OR : RELEASE_BINOP_MI<"or">;
-defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
-// Note: we don't deal with sub, because substractions of constants are
-// optimized into additions before this code can run
multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"8m PSEUDO!",
[(atomic_store_8 addr:$dst, dag8)]>;
def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"16m PSEUDO!",
[(atomic_store_16 addr:$dst, dag16)]>;
def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"32m PSEUDO!",
[(atomic_store_32 addr:$dst, dag32)]>;
def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
- "#RELEASE_UNOP PSEUDO!",
+ "#UNOP "#NAME#"64m PSEUDO!",
[(atomic_store_64 addr:$dst, dag64)]>;
}
-defm RELEASE_INC : RELEASE_UNOP<
- (add (atomic_load_8 addr:$dst), (i8 1)),
- (add (atomic_load_16 addr:$dst), (i16 1)),
- (add (atomic_load_32 addr:$dst), (i32 1)),
- (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
-defm RELEASE_DEC : RELEASE_UNOP<
- (add (atomic_load_8 addr:$dst), (i8 -1)),
- (add (atomic_load_16 addr:$dst), (i16 -1)),
- (add (atomic_load_32 addr:$dst), (i32 -1)),
- (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+let Defs = [EFLAGS] in {
+ defm RELEASE_INC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 1)),
+ (add (atomic_load_16 addr:$dst), (i16 1)),
+ (add (atomic_load_32 addr:$dst), (i32 1)),
+ (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+ defm RELEASE_DEC : RELEASE_UNOP<
+ (add (atomic_load_8 addr:$dst), (i8 -1)),
+ (add (atomic_load_16 addr:$dst), (i16 -1)),
+ (add (atomic_load_32 addr:$dst), (i32 -1)),
+ (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+}
/*
TODO: These don't work because the type inference of TableGen fails.
TODO: find a way to fix it.
-defm RELEASE_NEG : RELEASE_UNOP<
- (ineg (atomic_load_8 addr:$dst)),
- (ineg (atomic_load_16 addr:$dst)),
- (ineg (atomic_load_32 addr:$dst)),
- (ineg (atomic_load_64 addr:$dst))>;
+let Defs = [EFLAGS] in {
+ defm RELEASE_NEG : RELEASE_UNOP<
+ (ineg (atomic_load_8 addr:$dst)),
+ (ineg (atomic_load_16 addr:$dst)),
+ (ineg (atomic_load_32 addr:$dst)),
+ (ineg (atomic_load_64 addr:$dst))>;
+}
+// NOT doesn't set flags.
defm RELEASE_NOT : RELEASE_UNOP<
(not (atomic_load_8 addr:$dst)),
(not (atomic_load_16 addr:$dst)),
@@ -821,42 +896,42 @@ defm RELEASE_NOT : RELEASE_UNOP<
*/
def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV8mi PSEUDO!",
[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV16mi PSEUDO!",
[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV32mi PSEUDO!",
[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "#RELEASE_MOV PSEUDO !",
+ "#RELEASE_MOV64mi32 PSEUDO!",
[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
def RELEASE_MOV8mr : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV8mr PSEUDO!",
[(atomic_store_8 addr:$dst, GR8 :$src)]>;
def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV16mr PSEUDO!",
[(atomic_store_16 addr:$dst, GR16:$src)]>;
def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV32mr PSEUDO!",
[(atomic_store_32 addr:$dst, GR32:$src)]>;
def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
- "#RELEASE_MOV PSEUDO!",
+ "#RELEASE_MOV64mr PSEUDO!",
[(atomic_store_64 addr:$dst, GR64:$src)]>;
def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV8rm PSEUDO!",
[(set GR8:$dst, (atomic_load_8 addr:$src))]>;
def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV16rm PSEUDO!",
[(set GR16:$dst, (atomic_load_16 addr:$src))]>;
def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV32rm PSEUDO!",
[(set GR32:$dst, (atomic_load_32 addr:$src))]>;
def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
- "#ACQUIRE_MOV PSEUDO!",
+ "#ACQUIRE_MOV64rm PSEUDO!",
[(set GR64:$dst, (atomic_load_64 addr:$src))]>;
//===----------------------------------------------------------------------===//
@@ -1077,11 +1152,11 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
-def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
-def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
+def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>;
+def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0),
- (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
+ (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit
@@ -1298,7 +1373,6 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz
-let AddedComplexity = 1 in // Give priority over i64immZExt32.
def : Pat<(and GR64:$src, 0xffff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 4cd5563..8c351a5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -53,6 +53,19 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
"{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>;
def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
"{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16;
+
+ // The machine return from interrupt instruction, but sometimes we need to
+ // perform a post-epilogue stack adjustment. Codegen emits the pseudo form
+ // which expands to include an SP adjustment if necessary.
+ def IRET16 : I <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>,
+ OpSize16;
+ def IRET32 : I <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [],
+ IIC_IRET>, OpSize32;
+ def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", [],
+ IIC_IRET>, Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def IRET : PseudoI<(outs), (ins i16imm:$adj), [(X86iret timm:$adj)]>;
+
}
// Unconditional branches.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index 7cc3b59..fd800cf 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -15,13 +15,31 @@
// FMA3 - Intel 3 operand Fused Multiply-Add instructions
//===----------------------------------------------------------------------===//
-let Constraints = "$src1 = $dst" in {
+// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined
+// below, both the register and memory variants are commutable.
+// For the register form the commutable operands are 1, 2 and 3.
+// For the memory variant the folded operand must be in 3. Thus,
+// in that case, only the operands 1 and 2 can be swapped.
+// Commuting some of operands may require the opcode change.
+// FMA*213*:
+// operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
+// operands 1 and 3 (register forms only): *213* --> *231*;
+// operands 2 and 3 (register forms only): *213* --> *132*.
+// FMA*132*:
+// operands 1 and 2 (memory & register forms): *132* --> *231*;
+// operands 1 and 3 (register forms only): *132* --> *132*(no changes);
+// operands 2 and 3 (register forms only): *132* --> *213*.
+// FMA*231*:
+// operands 1 and 2 (memory & register forms): *231* --> *132*;
+// operands 1 and 3 (register forms only): *231* --> *213*;
+// operands 2 and 3 (register forms only): *231* --> *231*(no changes).
+
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
- bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator Op = null_frag> {
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -29,7 +47,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
@@ -37,7 +55,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -45,7 +63,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
VR256:$src3)))]>, VEX_L;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
@@ -54,34 +72,20 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
(OpVT256 (Op VR256:$src2, VR256:$src1,
(MemFrag256 addr:$src3))))]>, VEX_L;
}
-} // Constraints = "$src1 = $dst"
multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
- // For 213, both the register and memory variant are commutable.
- // Indeed, the commutable operands are 1 and 2 and both live in registers
- // for both variants.
defm r213 : fma3p_rm<opc213,
!strconcat(OpcodeStr, "213", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 1,
- Op>;
-let hasSideEffects = 0 in {
+ MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
- // For 231, only the register variant is commutable.
- // For the memory variant the folded operand must be in 3. Thus,
- // in that case, it cannot be swapped with 2.
defm r231 : fma3p_rm<opc231,
!strconcat(OpcodeStr, "231", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 0>;
-} // hasSideEffects = 0
+ MemFrag128, MemFrag256, OpTy128, OpTy256>;
}
// Fused Multiply-Add
@@ -126,83 +130,122 @@ let ExeDomain = SSEPackedDouble in {
v4f64>, VEX_W;
}
-let Constraints = "$src1 = $dst" in {
-multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
- RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
- bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
+// All source register operands of FMA opcodes defined in fma3s_rm multiclass
+// can be commuted. In many cases such commute transformation requres an opcode
+// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
+// would require an opcode change to FMA*231:
+// FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
+// -->
+// FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
+// Please see more detailed comment at the very beginning of the section
+// defining FMA3 opcodes above.
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
SDPatternOperator OpNode = null_frag> {
- let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+ let usesCustomInserter = 1 in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+ [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>;
- let mayLoad = 1, isCommutable = IsMVariantCommutable in
+ let mayLoad = 1 in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src2, RC:$src1,
- (mem_frag addr:$src3))))]>;
+ (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>;
+}
+
+// These FMA*_Int instructions are defined specially for being used when
+// the scalar FMA intrinsics are lowered to machine instructions, and in that
+// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
+// instructions.
+//
+// All of the FMA*_Int opcodes are defined as commutable here.
+// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
+// and the corresponding optimizations have been developed.
+// Commuting the 1st operand of FMA*_Int requires some additional analysis,
+// the commute optimization is legal only if all users of FMA*_Int use only
+// the lowest element of the FMA*_Int instruction. Even though such analysis
+// may be not implemented yet we allow the routines doing the actual commute
+// transformation to decide if one or another instruction is commutable or not.
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+ hasSideEffects = 0 in
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
+ Operand memopr, RegisterClass RC> {
+ def r_Int : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ []>;
+
+ let mayLoad = 1 in
+ def m_Int : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, memopr:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ []>;
}
-} // Constraints = "$src1 = $dst"
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
- string OpStr, string PackTy, string PT2, Intrinsic Int,
- SDNode OpNode, RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
- ComplexPattern mem_cpat> {
-let hasSideEffects = 0 in {
- defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
- x86memop, RC, OpVT, mem_frag>;
- // See the other defm of r231 for the explanation regarding the
- // commutable flags.
- defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC, OpVT, mem_frag,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 0>;
+ string OpStr, string PackTy,
+ SDNode OpNode, RegisterClass RC,
+ X86MemOperand x86memop> {
+ defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>;
+ defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC,
+ OpNode>;
+ defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>;
}
-// See the other defm of r213 for the explanation regarding the
-// commutable flags.
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpVT, mem_frag,
- /* IsRVariantCommutable */ 1,
- /* IsMVariantCommutable */ 1,
- OpNode>;
+// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// to machine instructions.
+// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
+// of FMA 213 form.
+// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
+// forms and is possible only after special analysis of all uses of the initial
+// instruction. Such analysis do not exist yet and thus introducing the 231
+// form of FMA*_Int instructions is done using an optimistic assumption that
+// such analysis will be implemented eventually.
+multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpStr, string PackTy,
+ RegisterClass RC, Operand memop> {
+ defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+ memop, RC>;
+ defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+ memop, RC>;
+ defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+ memop, RC>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, Intrinsic IntF32, Intrinsic IntF64,
SDNode OpNode> {
- defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", IntF32, OpNode,
- FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
- defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
- FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+ let ExeDomain = SSEPackedSingle in
+ defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode,
+ FR32, f32mem>,
+ fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>;
+
+ let ExeDomain = SSEPackedDouble in
+ defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode,
+ FR64, f64mem>,
+ fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>,
+ VEX_W;
-// These patterns use the 123 ordering, instead of 213, even though
-// they match the intrinsic to the 213 version of the instruction.
-// This is because src1 is tied to dest, and the scalar intrinsics
-// require the pass-through values to come from the first source
-// operand, not the second.
+ // These patterns use the 123 ordering, instead of 213, even though
+ // they match the intrinsic to the 213 version of the instruction.
+ // This is because src1 is tied to dest, and the scalar intrinsics
+ // require the pass-through values to come from the first source
+ // operand, not the second.
def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"SSr213r")
- (COPY_TO_REGCLASS $src1, FR32),
- (COPY_TO_REGCLASS $src2, FR32),
- (COPY_TO_REGCLASS $src3, FR32)),
- VR128)>;
+ (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int")
+ $src1, $src2, $src3), VR128)>;
def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
- (COPY_TO_REGCLASS
- (!cast<Instruction>(NAME#"SDr213r")
- (COPY_TO_REGCLASS $src1, FR64),
- (COPY_TO_REGCLASS $src2, FR64),
- (COPY_TO_REGCLASS $src3, FR64)),
- VR128)>;
+ (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int")
+ $src1, $src2, $src3), VR128)>;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
@@ -334,36 +377,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
} // isCodeGenOnly = 1
}
-defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
- fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfmadd_sd>;
-defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
- fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfmsub_sd>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
- X86Fnmadd, loadf32>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
- int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
- X86Fnmadd, loadf64>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmadd_sd>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
- X86Fnmsub, loadf32>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
- int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
- X86Fnmsub, loadf64>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
- int_x86_fma_vfnmsub_sd>;
-
let ExeDomain = SSEPackedSingle in {
+ // Scalar Instructions
+ defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+ fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfmadd_ss>;
+ defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+ fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfmsub_ss>;
+ defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+ X86Fnmadd, loadf32>,
+ fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmadd_ss>;
+ defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+ X86Fnmsub, loadf32>,
+ fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+ int_x86_fma_vfnmsub_ss>;
+ // Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
@@ -379,6 +409,22 @@ let ExeDomain = SSEPackedSingle in {
}
let ExeDomain = SSEPackedDouble in {
+ // Scalar Instructions
+ defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+ fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmadd_sd>;
+ defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+ fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfmsub_sd>;
+ defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+ X86Fnmadd, loadf64>,
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmadd_sd>;
+ defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+ X86Fnmsub, loadf64>,
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+ int_x86_fma_vfnmsub_sd>;
+ // Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index 49068e9..03ae211 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -137,69 +137,99 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
// The FopST0 series are not included here because of the irregularities
// in where the 'r' goes in assembly output.
// These instructions cannot address 80-bit memory.
-multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
+ bit Forward = 1> {
// ST(0) = ST(0) + [mem]
def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP32:$dst,
- (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (loadf32 addr:$src2))),
+ (set RFP32:$dst,
+ (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
- (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (loadf64 addr:$src2))),
+ (set RFP64:$dst,
+ (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
- (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
+ (set RFP64:$dst,
+ (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
- (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
+ (set RFP80:$dst,
+ (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
- (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
+ (set RFP80:$dst,
+ (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("f", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("f", asmstring, "{l}\t$src")>;
// ST(0) = ST(0) + [memint]
def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
- [(set RFP32:$dst, (OpNode RFP32:$src1,
- (X86fild addr:$src2, i16)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+ (set RFP32:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
- [(set RFP32:$dst, (OpNode RFP32:$src1,
- (X86fild addr:$src2, i32)))]>;
+ [!if(Forward,
+ (set RFP32:$dst,
+ (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+ (set RFP32:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
- [(set RFP64:$dst, (OpNode RFP64:$src1,
- (X86fild addr:$src2, i16)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+ (set RFP64:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
- [(set RFP64:$dst, (OpNode RFP64:$src1,
- (X86fild addr:$src2, i32)))]>;
+ [!if(Forward,
+ (set RFP64:$dst,
+ (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+ (set RFP64:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
- OneArgFPRW,
- [(set RFP80:$dst, (OpNode RFP80:$src1,
- (X86fild addr:$src2, i16)))]>;
+ OneArgFPRW,
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+ (set RFP80:$dst,
+ (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
- OneArgFPRW,
- [(set RFP80:$dst, (OpNode RFP80:$src1,
- (X86fild addr:$src2, i32)))]>;
+ OneArgFPRW,
+ [!if(Forward,
+ (set RFP80:$dst,
+ (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+ (set RFP80:$dst,
+ (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+let mayLoad = 1 in
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("fi", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
-}
+ !strconcat("fi", asmstring, "{l}\t$src")>;
}
let Defs = [FPSW] in {
@@ -213,14 +243,14 @@ defm DIV : FPBinary_rr<fdiv>;
let SchedRW = [WriteFAddLd] in {
defm ADD : FPBinary<fadd, MRM0m, "add">;
defm SUB : FPBinary<fsub, MRM4m, "sub">;
-defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
}
let SchedRW = [WriteFMulLd] in {
defm MUL : FPBinary<fmul, MRM1m, "mul">;
}
let SchedRW = [WriteFDivLd] in {
defm DIV : FPBinary<fdiv, MRM6m, "div">;
-defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
}
}
@@ -306,13 +336,13 @@ def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
-def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+def FNSTSWm : FPI<0xDD, MRM7m, (outs i16mem:$dst), (ins), "fnstsw\t$dst">;
def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
-def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
-def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">;
+def FBSTPm : FPI<0xDF, MRM6m, (outs f80mem:$dst), (ins), "fbstp\t$dst">;
// Floating point cmovs.
class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
@@ -633,16 +663,18 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
-def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
- IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
-def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
-def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
- IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+let Predicates = [HasFXSR] in {
+ def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+ def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
+ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
+ def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+} // Predicates = [FeatureFXSR]
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1f61ffa..829cedd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -38,6 +38,8 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>;
def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
+def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -58,13 +60,17 @@ def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]>;
def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86frsqrt14s: SDNode<"X86ISD::FRSQRT", SDTFPBinOp>;
+def X86frcp14s : SDNode<"X86ISD::FRCP", SDTFPBinOp>;
def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>;
def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>;
def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>;
def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>;
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>;
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>;
def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
@@ -74,11 +80,18 @@ def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86psadbw : SDNode<"X86ISD::PSADBW",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
+def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
def X86andnp : SDNode<"X86ISD::ANDNP",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -86,9 +99,11 @@ def X86psign : SDNode<"X86ISD::PSIGN",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
def X86pextrb : SDNode<"X86ISD::PEXTRB",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
+ SDTCisPtrTy<2>]>>;
def X86pextrw : SDNode<"X86ISD::PEXTRW",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
+ SDTCisPtrTy<2>]>>;
def X86pinsrb : SDNode<"X86ISD::PINSRB",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
@@ -114,19 +129,17 @@ def X86vsext : SDNode<"X86ISD::VSEXT",
SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<1, 0>]>>;
-def X86vtrunc : SDNode<"X86ISD::VTRUNC",
- SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisOpSmallerThanOp<0, 1>]>>;
+def SDTVtrunc : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
+
+def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>;
+def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>;
+def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+
def X86trunc : SDNode<"X86ISD::TRUNC",
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
-
-def X86vtruncm : SDNode<"X86ISD::VTRUNCM",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVec<2>, SDTCisInt<2>,
- SDTCisOpSmallerThanOp<0, 2>]>>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
@@ -136,6 +149,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
SDTCisFP<0>, SDTCisFP<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86fround: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCVecEltisVT<2, f64>,
+ SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86froundRnd: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCVecEltisVT<2, f64>,
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisInt<3>]>>;
+
+def X86fpext : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCVecEltisVT<2, f32>,
+ SDTCisOpSmallerThanOp<1, 0>]>>;
+
+def X86fpextRnd : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCVecEltisVT<2, f32>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisInt<3>]>>;
+
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
@@ -159,10 +201,15 @@ def X86CmpMaskCCRound :
def X86CmpMaskCCScalar :
SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
-def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
-def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
-def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
-def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86CmpMaskCCScalarRound :
+ SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
+ SDTCisInt<4>]>;
+
+def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>;
+def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>;
+def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>;
+def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>;
def X86vshl : SDNode<"X86ISD::VSHL",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -178,6 +225,29 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86vprot : SDNode<"X86ISD::VPROT",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86vproti : SDNode<"X86ISD::VPROTI",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i8>]>>;
+
+def X86vpshl : SDNode<"X86ISD::VPSHL",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86vpsha : SDNode<"X86ISD::VPSHA",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+
+def X86vpcom : SDNode<"X86ISD::VPCOM",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i8>]>>;
+def X86vpcomu : SDNode<"X86ISD::VPCOMU",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i8>]>>;
+
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
@@ -190,6 +260,7 @@ def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
+def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisVec<1>, SDTCisSameAs<2, 1>,
SDTCVecEltisVT<0, i1>,
@@ -201,11 +272,15 @@ def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<1,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i32>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
def X86pmuldq : SDNode<"X86ISD::PMULDQ",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
- SDTCisSameAs<1,2>]>>;
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+ SDTCVecEltisVT<1, i32>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
def X86extrqi : SDNode<"X86ISD::EXTRQI",
SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
@@ -221,24 +296,30 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI",
def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
-def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisVec<2>]>;
+ SDTCisSameSizeAs<0,2>,
+ SDTCisSameNumEltsAs<0,2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
- SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+ SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+ SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisInt<2>, SDTCisInt<3>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
+ SDTCisInt<0>, SDTCisInt<1>]>;
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
+def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
+ SDTCisVT<4, i8>]>;
+
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
@@ -250,15 +331,17 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<2>]>;
+ SDTCisVec<0>, SDTCisVT<2, i32>]>;
def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<3>]>;
+ SDTCisVec<0>, SDTCisVT<3, i32>]>;
def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
- SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
+ SDTCisVec<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+
+def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -281,33 +364,74 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>;
def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
+def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
+def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack>;
+
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
-def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>;
+def X86VPermv : SDNode<"X86ISD::VPERMV",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
+ SDTCisSameNumEltsAs<0,1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>;
-def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>;
-def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>;
+def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>,
+ SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>,
+ SDTCisSameSizeAs<0,2>,
+ SDTCisSameAs<0,3>]>, []>;
+
+def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
+ SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>, []>;
+
+def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
-def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
-def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
+def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
+def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>;
+def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+ SDTCisVec<1>, SDTCisFP<1>,
+ SDTCisSameNumEltsAs<0,1>,
+ SDTCisVT<2, i32>]>, []>;
+def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i1>,
+ SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSubVecOfVec<1, 0>]>, []>;
+// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2.
+def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST",
+ SDTypeProfile<1, 1, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>]>, []>;
+
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+ [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
+ SDTCisPtrTy<3>]>, []>;
def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
- [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;
+ [SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
+ SDTCisPtrTy<2>]>, []>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
@@ -317,11 +441,13 @@ def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
-def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
-def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
-def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
-def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
-def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
+def X86fsqrtRnds : SDNode<"X86ISD::FSQRT_RND", STDFp2SrcRm>;
+def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnds : SDNode<"X86ISD::FGETEXP_RND", STDFp2SrcRm>;
def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
@@ -341,9 +467,11 @@ def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
-def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
-def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
-def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>;
+def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>;
+def X86Reduces : SDNode<"X86ISD::VREDUCE", STDFp3SrcRm>;
+def X86GetMants : SDNode<"X86ISD::VGETMANT", STDFp3SrcRm>;
def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -362,7 +490,8 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
- SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+ SDTCisSameAs<0,1>, SDTCisInt<2>,
+ SDTCisVT<3, i32>]>;
def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
@@ -371,9 +500,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>,
+ SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>;
def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
-
+def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>;
def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
SDTCisInt<2>]>;
@@ -392,6 +524,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+def X86cvttss2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSFloatToIntRnd>;
+def X86cvttss2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSFloatToIntRnd>;
+def X86cvttsd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSDoubleToIntRnd>;
+def X86cvttsd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSDoubleToIntRnd>;
// Vector with rounding mode
// cvtt fp-to-int staff
@@ -417,17 +553,35 @@ def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>;
def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>;
def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>;
+def X86cvtph2ps : SDNode<"ISD::FP16_TO_FP",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, i16>,
+ SDTCisFP<0>,
+ SDTCisVT<2, i32>]> >;
+
+def X86cvtps2ph : SDNode<"ISD::FP_TO_FP16",
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisFP<1>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]> >;
def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<0, f64>,
+ SDTCVecEltisVT<1, f32>,
SDTCisOpSmallerThanOp<1, 0>,
- SDTCisInt<2>]>>;
+ SDTCisVT<2, i32>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisFP<1>,
SDTCVecEltisVT<0, f32>,
SDTCVecEltisVT<1, f64>,
- SDTCisInt<2>]>>;
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisVT<2, i32>]>>;
+
+def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -436,10 +590,10 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
// These are 'extloads' from a scalar to the low element of a vector, zeroing
// the top elements. These are used for the SSE 'ss' and 'sd' instruction
// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
SDNPWantRoot]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
SDNPWantRoot]>;
@@ -490,9 +644,9 @@ def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
// The memory operand is required to be a 128-bit load, so it must be converted
// from a vector to a scalar.
def loadf32_128 : PatFrag<(ops node:$ptr),
- (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+ (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
def loadf64_128 : PatFrag<(ops node:$ptr),
- (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+ (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
// Like 'store', but always requires 128-bit vector alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -590,9 +744,9 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
// The memory operand is required to be a 128-bit load, so it must be converted
// from a vector to a scalar.
def memopfsf32_128 : PatFrag<(ops node:$ptr),
- (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+ (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>;
def memopfsf64_128 : PatFrag<(ops node:$ptr),
- (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
+ (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
@@ -604,32 +758,6 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>;
-// MOVNT Support
-// Like 'store', but requires the non-temporal bit to be set
-def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal();
- return false;
-}]>;
-
-def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() && !ST->isTruncatingStore() &&
- ST->getAddressingMode() == ISD::UNINDEXED &&
- ST->getAlignment() >= 16;
- return false;
-}]>;
-
-def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() &&
- ST->getAlignment() < 16;
- return false;
-}]>;
-
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_gather node:$src1, node:$src2, node:$src3) , [{
if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -851,29 +979,59 @@ def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return isa<MaskedLoadSDNode>(N);
}]>;
+// masked store fragments.
+// X86mstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+
def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 16;
return false;
}]>;
def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 32;
return false;
}]>;
def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 64;
return false;
}]>;
def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (masked_store node:$src1, node:$src2, node:$src3), [{
+ (X86mstore node:$src1, node:$src2, node:$src3), [{
return isa<MaskedStoreSDNode>(N);
}]>;
+// masked truncstore fragments
+// X86mtruncstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+def masked_truncstorevi8 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def masked_truncstorevi16 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def masked_truncstorevi32 :
+ PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index cf68ef0..63e78de 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
@@ -101,9 +102,11 @@ struct X86MemoryFoldTableEntry {
void X86InstrInfo::anchor() {}
X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
- : X86GenInstrInfo(
- (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
- (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
+ : X86::ADJCALLSTACKDOWN32),
+ (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
+ : X86::ADJCALLSTACKUP32),
+ X86::CATCHRET),
Subtarget(STI), RI(STI.getTargetTriple()) {
static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
@@ -332,6 +335,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
{ X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
{ X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
+ { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
+ { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
+ { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
{ X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE },
{ X86::SETAr, X86::SETAm, TB_FOLDED_STORE },
{ X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE },
@@ -495,7 +501,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
{ X86::MOVUPDrr, X86::MOVUPDrm, TB_ALIGN_16 },
{ X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 },
{ X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
{ X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
@@ -605,7 +610,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
{ X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
{ X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 },
{ X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 },
{ X86::VPABSBrr128, X86::VPABSBrm128, 0 },
{ X86::VPABSDrr128, X86::VPABSDrm128, 0 },
@@ -1647,6 +1651,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PEXT32rr, X86::PEXT32rm, 0 },
{ X86::PEXT64rr, X86::PEXT64rm, 0 },
+ // ADX foldable instructions
+ { X86::ADCX32rr, X86::ADCX32rm, 0 },
+ { X86::ADCX64rr, X86::ADCX64rm, 0 },
+ { X86::ADOX32rr, X86::ADOX32rm, 0 },
+ { X86::ADOX64rr, X86::ADOX64rm, 0 },
+
// AVX-512 foldable instructions
{ X86::VADDPSZrr, X86::VADDPSZrm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
@@ -1729,11 +1739,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
// FMA foldable instructions
{ X86::VFMADDSSr231r, X86::VFMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr231r_Int, X86::VFMADDSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr231r, X86::VFMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr231r_Int, X86::VFMADDSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSSr132r, X86::VFMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr132r, X86::VFMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSSr213r, X86::VFMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFMADDSDr213r, X86::VFMADDSDr213m, TB_ALIGN_NONE },
+ { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_NONE },
{ X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_NONE },
@@ -1749,11 +1765,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_NONE },
{ X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr231r_Int, X86::VFNMADDSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr231r_Int, X86::VFNMADDSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSSr132r, X86::VFNMADDSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, TB_ALIGN_NONE },
+ { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_NONE },
{ X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_NONE },
@@ -1769,11 +1791,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_NONE },
{ X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr231r_Int, X86::VFMSUBSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr231r_Int, X86::VFMSUBSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSSr132r, X86::VFMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, TB_ALIGN_NONE },
+ { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_NONE },
{ X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_NONE },
@@ -1789,11 +1817,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr231r_Int, X86::VFNMSUBSSr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr231r_Int, X86::VFNMSUBSDr231m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr132r, X86::VFNMSUBSSr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, TB_ALIGN_NONE },
+ { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, TB_ALIGN_NONE },
{ X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_NONE },
{ X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_NONE },
@@ -2282,7 +2316,35 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::FsVMOVAPSrm:
case X86::FsVMOVAPDrm:
case X86::FsMOVAPSrm:
- case X86::FsMOVAPDrm: {
+ case X86::FsMOVAPDrm:
+ // AVX-512
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVAPDZ256rm:
+ case X86::VMOVAPDZrm:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVAPSZ256rm:
+ case X86::VMOVAPSZrm:
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQA32Z256rm:
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQA64Z256rm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU16Z128rm:
+ case X86::VMOVDQU16Z256rm:
+ case X86::VMOVDQU16Zrm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQU32Z256rm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQU64Z128rm:
+ case X86::VMOVDQU64Z256rm:
+ case X86::VMOVDQU64Zrm:
+ case X86::VMOVDQU8Z128rm:
+ case X86::VMOVDQU8Z256rm:
+ case X86::VMOVDQU8Zrm:
+ case X86::VMOVUPSZ128rm:
+ case X86::VMOVUPSZ256rm:
+ case X86::VMOVUPSZrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
@@ -2363,9 +2425,8 @@ bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// It is safe to clobber EFLAGS at the end of a block of no successor has it
// live in.
if (Iter == E) {
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *S : MBB.successors())
+ if (S->isLiveIn(X86::EFLAGS))
return false;
return true;
}
@@ -2411,13 +2472,29 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
- // MOV32r0 is implemented with a xor which clobbers condition code.
- // Re-materialize it as movri instructions to avoid side effects.
- unsigned Opc = Orig->getOpcode();
- if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+ bool ClobbersEFLAGS = false;
+ for (const MachineOperand &MO : Orig->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
+ ClobbersEFLAGS = true;
+ break;
+ }
+ }
+
+ if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+ // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
+ // effects.
+ int Value;
+ switch (Orig->getOpcode()) {
+ case X86::MOV32r0: Value = 0; break;
+ case X86::MOV32r1: Value = 1; break;
+ case X86::MOV32r_1: Value = -1; break;
+ default:
+ llvm_unreachable("Unexpected instruction!");
+ }
+
DebugLoc DL = Orig->getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
- .addImm(0);
+ .addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MBB.insert(I, MI);
@@ -2428,7 +2505,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
}
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
-static bool hasLiveCondCodeDef(MachineInstr *MI) {
+bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr *MI) const {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() &&
@@ -2453,7 +2530,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
// Left shift instructions can be transformed into load-effective-address
// instructions if we can encode them appropriately.
- // A LEA instruction utilizes a SIB byte to encode it's scale factor.
+ // A LEA instruction utilizes a SIB byte to encode its scale factor.
// The SIB.scale field is two bits wide which means that we can encode any
// shift amount less than 4.
return ShAmt < 4 && ShAmt > 0;
@@ -2493,7 +2570,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
ImplicitOp = Src;
ImplicitOp.setImplicit();
- NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64);
+ NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
MachineBasicBlock::LivenessQueryResult LQR =
MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
@@ -2914,10 +2991,162 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMI;
}
-/// We have a few instructions that must be hacked on to commute them.
-///
-MachineInstr *
-X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+/// Returns true if the given instruction opcode is FMA3.
+/// Otherwise, returns false.
+/// The second parameter is optional and is used as the second return from
+/// the function. It is set to true if the given instruction has FMA3 opcode
+/// that is used for lowering of scalar FMA intrinsics, and it is set to false
+/// otherwise.
+static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) {
+ if (IsIntrinsic)
+ *IsIntrinsic = false;
+
+ switch (Opcode) {
+ case X86::VFMADDSDr132r: case X86::VFMADDSDr132m:
+ case X86::VFMADDSSr132r: case X86::VFMADDSSr132m:
+ case X86::VFMSUBSDr132r: case X86::VFMSUBSDr132m:
+ case X86::VFMSUBSSr132r: case X86::VFMSUBSSr132m:
+ case X86::VFNMADDSDr132r: case X86::VFNMADDSDr132m:
+ case X86::VFNMADDSSr132r: case X86::VFNMADDSSr132m:
+ case X86::VFNMSUBSDr132r: case X86::VFNMSUBSDr132m:
+ case X86::VFNMSUBSSr132r: case X86::VFNMSUBSSr132m:
+
+ case X86::VFMADDSDr213r: case X86::VFMADDSDr213m:
+ case X86::VFMADDSSr213r: case X86::VFMADDSSr213m:
+ case X86::VFMSUBSDr213r: case X86::VFMSUBSDr213m:
+ case X86::VFMSUBSSr213r: case X86::VFMSUBSSr213m:
+ case X86::VFNMADDSDr213r: case X86::VFNMADDSDr213m:
+ case X86::VFNMADDSSr213r: case X86::VFNMADDSSr213m:
+ case X86::VFNMSUBSDr213r: case X86::VFNMSUBSDr213m:
+ case X86::VFNMSUBSSr213r: case X86::VFNMSUBSSr213m:
+
+ case X86::VFMADDSDr231r: case X86::VFMADDSDr231m:
+ case X86::VFMADDSSr231r: case X86::VFMADDSSr231m:
+ case X86::VFMSUBSDr231r: case X86::VFMSUBSDr231m:
+ case X86::VFMSUBSSr231r: case X86::VFMSUBSSr231m:
+ case X86::VFNMADDSDr231r: case X86::VFNMADDSDr231m:
+ case X86::VFNMADDSSr231r: case X86::VFNMADDSSr231m:
+ case X86::VFNMSUBSDr231r: case X86::VFNMSUBSDr231m:
+ case X86::VFNMSUBSSr231r: case X86::VFNMSUBSSr231m:
+
+ case X86::VFMADDSUBPDr132r: case X86::VFMADDSUBPDr132m:
+ case X86::VFMADDSUBPSr132r: case X86::VFMADDSUBPSr132m:
+ case X86::VFMSUBADDPDr132r: case X86::VFMSUBADDPDr132m:
+ case X86::VFMSUBADDPSr132r: case X86::VFMSUBADDPSr132m:
+ case X86::VFMADDSUBPDr132rY: case X86::VFMADDSUBPDr132mY:
+ case X86::VFMADDSUBPSr132rY: case X86::VFMADDSUBPSr132mY:
+ case X86::VFMSUBADDPDr132rY: case X86::VFMSUBADDPDr132mY:
+ case X86::VFMSUBADDPSr132rY: case X86::VFMSUBADDPSr132mY:
+
+ case X86::VFMADDPDr132r: case X86::VFMADDPDr132m:
+ case X86::VFMADDPSr132r: case X86::VFMADDPSr132m:
+ case X86::VFMSUBPDr132r: case X86::VFMSUBPDr132m:
+ case X86::VFMSUBPSr132r: case X86::VFMSUBPSr132m:
+ case X86::VFNMADDPDr132r: case X86::VFNMADDPDr132m:
+ case X86::VFNMADDPSr132r: case X86::VFNMADDPSr132m:
+ case X86::VFNMSUBPDr132r: case X86::VFNMSUBPDr132m:
+ case X86::VFNMSUBPSr132r: case X86::VFNMSUBPSr132m:
+ case X86::VFMADDPDr132rY: case X86::VFMADDPDr132mY:
+ case X86::VFMADDPSr132rY: case X86::VFMADDPSr132mY:
+ case X86::VFMSUBPDr132rY: case X86::VFMSUBPDr132mY:
+ case X86::VFMSUBPSr132rY: case X86::VFMSUBPSr132mY:
+ case X86::VFNMADDPDr132rY: case X86::VFNMADDPDr132mY:
+ case X86::VFNMADDPSr132rY: case X86::VFNMADDPSr132mY:
+ case X86::VFNMSUBPDr132rY: case X86::VFNMSUBPDr132mY:
+ case X86::VFNMSUBPSr132rY: case X86::VFNMSUBPSr132mY:
+
+ case X86::VFMADDSUBPDr213r: case X86::VFMADDSUBPDr213m:
+ case X86::VFMADDSUBPSr213r: case X86::VFMADDSUBPSr213m:
+ case X86::VFMSUBADDPDr213r: case X86::VFMSUBADDPDr213m:
+ case X86::VFMSUBADDPSr213r: case X86::VFMSUBADDPSr213m:
+ case X86::VFMADDSUBPDr213rY: case X86::VFMADDSUBPDr213mY:
+ case X86::VFMADDSUBPSr213rY: case X86::VFMADDSUBPSr213mY:
+ case X86::VFMSUBADDPDr213rY: case X86::VFMSUBADDPDr213mY:
+ case X86::VFMSUBADDPSr213rY: case X86::VFMSUBADDPSr213mY:
+
+ case X86::VFMADDPDr213r: case X86::VFMADDPDr213m:
+ case X86::VFMADDPSr213r: case X86::VFMADDPSr213m:
+ case X86::VFMSUBPDr213r: case X86::VFMSUBPDr213m:
+ case X86::VFMSUBPSr213r: case X86::VFMSUBPSr213m:
+ case X86::VFNMADDPDr213r: case X86::VFNMADDPDr213m:
+ case X86::VFNMADDPSr213r: case X86::VFNMADDPSr213m:
+ case X86::VFNMSUBPDr213r: case X86::VFNMSUBPDr213m:
+ case X86::VFNMSUBPSr213r: case X86::VFNMSUBPSr213m:
+ case X86::VFMADDPDr213rY: case X86::VFMADDPDr213mY:
+ case X86::VFMADDPSr213rY: case X86::VFMADDPSr213mY:
+ case X86::VFMSUBPDr213rY: case X86::VFMSUBPDr213mY:
+ case X86::VFMSUBPSr213rY: case X86::VFMSUBPSr213mY:
+ case X86::VFNMADDPDr213rY: case X86::VFNMADDPDr213mY:
+ case X86::VFNMADDPSr213rY: case X86::VFNMADDPSr213mY:
+ case X86::VFNMSUBPDr213rY: case X86::VFNMSUBPDr213mY:
+ case X86::VFNMSUBPSr213rY: case X86::VFNMSUBPSr213mY:
+
+ case X86::VFMADDSUBPDr231r: case X86::VFMADDSUBPDr231m:
+ case X86::VFMADDSUBPSr231r: case X86::VFMADDSUBPSr231m:
+ case X86::VFMSUBADDPDr231r: case X86::VFMSUBADDPDr231m:
+ case X86::VFMSUBADDPSr231r: case X86::VFMSUBADDPSr231m:
+ case X86::VFMADDSUBPDr231rY: case X86::VFMADDSUBPDr231mY:
+ case X86::VFMADDSUBPSr231rY: case X86::VFMADDSUBPSr231mY:
+ case X86::VFMSUBADDPDr231rY: case X86::VFMSUBADDPDr231mY:
+ case X86::VFMSUBADDPSr231rY: case X86::VFMSUBADDPSr231mY:
+
+ case X86::VFMADDPDr231r: case X86::VFMADDPDr231m:
+ case X86::VFMADDPSr231r: case X86::VFMADDPSr231m:
+ case X86::VFMSUBPDr231r: case X86::VFMSUBPDr231m:
+ case X86::VFMSUBPSr231r: case X86::VFMSUBPSr231m:
+ case X86::VFNMADDPDr231r: case X86::VFNMADDPDr231m:
+ case X86::VFNMADDPSr231r: case X86::VFNMADDPSr231m:
+ case X86::VFNMSUBPDr231r: case X86::VFNMSUBPDr231m:
+ case X86::VFNMSUBPSr231r: case X86::VFNMSUBPSr231m:
+ case X86::VFMADDPDr231rY: case X86::VFMADDPDr231mY:
+ case X86::VFMADDPSr231rY: case X86::VFMADDPSr231mY:
+ case X86::VFMSUBPDr231rY: case X86::VFMSUBPDr231mY:
+ case X86::VFMSUBPSr231rY: case X86::VFMSUBPSr231mY:
+ case X86::VFNMADDPDr231rY: case X86::VFNMADDPDr231mY:
+ case X86::VFNMADDPSr231rY: case X86::VFNMADDPSr231mY:
+ case X86::VFNMSUBPDr231rY: case X86::VFNMSUBPDr231mY:
+ case X86::VFNMSUBPSr231rY: case X86::VFNMSUBPSr231mY:
+ return true;
+
+ case X86::VFMADDSDr132r_Int: case X86::VFMADDSDr132m_Int:
+ case X86::VFMADDSSr132r_Int: case X86::VFMADDSSr132m_Int:
+ case X86::VFMSUBSDr132r_Int: case X86::VFMSUBSDr132m_Int:
+ case X86::VFMSUBSSr132r_Int: case X86::VFMSUBSSr132m_Int:
+ case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
+ case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
+ case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
+ case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
+
+ case X86::VFMADDSDr213r_Int: case X86::VFMADDSDr213m_Int:
+ case X86::VFMADDSSr213r_Int: case X86::VFMADDSSr213m_Int:
+ case X86::VFMSUBSDr213r_Int: case X86::VFMSUBSDr213m_Int:
+ case X86::VFMSUBSSr213r_Int: case X86::VFMSUBSSr213m_Int:
+ case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
+ case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
+ case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
+ case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
+
+ case X86::VFMADDSDr231r_Int: case X86::VFMADDSDr231m_Int:
+ case X86::VFMADDSSr231r_Int: case X86::VFMADDSSr231m_Int:
+ case X86::VFMSUBSDr231r_Int: case X86::VFMSUBSDr231m_Int:
+ case X86::VFMSUBSSr231r_Int: case X86::VFMSUBSSr231m_Int:
+ case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
+ case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
+ case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
+ case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
+ if (IsIntrinsic)
+ *IsIntrinsic = true;
+ return true;
+ default:
+ return false;
+ }
+ llvm_unreachable("Opcode not handled by the switch");
+}
+
+MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr *MI,
+ bool NewMI,
+ unsigned OpIdx1,
+ unsigned OpIdx2) const {
switch (MI->getOpcode()) {
case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
@@ -2944,7 +3173,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
}
MI->setDesc(get(Opc));
MI->getOperand(3).setImm(Size-Amt);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::BLENDPDrri:
case X86::BLENDPSrri:
@@ -2980,7 +3209,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm(Mask ^ Imm);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::PCLMULQDQrr:
case X86::VPCLMULQDQrr:{
@@ -2995,7 +3224,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::CMPPDrri:
case X86::CMPPSrri:
@@ -3016,7 +3245,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
MI = MF.CloneMachineInstr(MI);
NewMI = false;
}
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
default:
return nullptr;
}
@@ -3045,7 +3274,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
NewMI = false;
}
MI->getOperand(3).setImm(Imm);
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr:
case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -3124,11 +3353,272 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Fallthrough intended.
}
default:
- return TargetInstrInfo::commuteInstruction(MI, NewMI);
+ if (isFMA3(MI->getOpcode())) {
+ unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2);
+ if (Opc == 0)
+ return nullptr;
+ if (NewMI) {
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MI = MF.CloneMachineInstr(MI);
+ NewMI = false;
+ }
+ MI->setDesc(get(Opc));
+ }
+ return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+ }
+}
+
+bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+
+ unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3;
+
+ // Only the first RegOpsNum operands are commutable.
+ // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
+ // that the operand is not specified/fixed.
+ if (SrcOpIdx1 != CommuteAnyOperandIndex &&
+ (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum))
+ return false;
+ if (SrcOpIdx2 != CommuteAnyOperandIndex &&
+ (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum))
+ return false;
+
+ // Look for two different register operands assumed to be commutable
+ // regardless of the FMA opcode. The FMA opcode is adjusted later.
+ if (SrcOpIdx1 == CommuteAnyOperandIndex ||
+ SrcOpIdx2 == CommuteAnyOperandIndex) {
+ unsigned CommutableOpIdx1 = SrcOpIdx1;
+ unsigned CommutableOpIdx2 = SrcOpIdx2;
+
+ // At least one of operands to be commuted is not specified and
+ // this method is free to choose appropriate commutable operands.
+ if (SrcOpIdx1 == SrcOpIdx2)
+ // Both of operands are not fixed. By default set one of commutable
+ // operands to the last register operand of the instruction.
+ CommutableOpIdx2 = RegOpsNum;
+ else if (SrcOpIdx2 == CommuteAnyOperandIndex)
+ // Only one of operands is not fixed.
+ CommutableOpIdx2 = SrcOpIdx1;
+
+ // CommutableOpIdx2 is well defined now. Let's choose another commutable
+ // operand and assign its index to CommutableOpIdx1.
+ unsigned Op2Reg = MI->getOperand(CommutableOpIdx2).getReg();
+ for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {
+ // The commuted operands must have different registers.
+ // Otherwise, the commute transformation does not change anything and
+ // is useless then.
+ if (Op2Reg != MI->getOperand(CommutableOpIdx1).getReg())
+ break;
+ }
+
+ // No appropriate commutable operands were found.
+ if (CommutableOpIdx1 == 0)
+ return false;
+
+ // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
+ // to return those values.
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+ CommutableOpIdx1, CommutableOpIdx2))
+ return false;
+ }
+
+ // Check if we can adjust the opcode to preserve the semantics when
+ // commute the register operands.
+ return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2) != 0;
+}
+
+unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2) const {
+ unsigned Opc = MI->getOpcode();
+
+ // Define the array that holds FMA opcodes in groups
+ // of 3 opcodes(132, 213, 231) in each group.
+ static const unsigned RegularOpcodeGroups[][3] = {
+ { X86::VFMADDSSr132r, X86::VFMADDSSr213r, X86::VFMADDSSr231r },
+ { X86::VFMADDSDr132r, X86::VFMADDSDr213r, X86::VFMADDSDr231r },
+ { X86::VFMADDPSr132r, X86::VFMADDPSr213r, X86::VFMADDPSr231r },
+ { X86::VFMADDPDr132r, X86::VFMADDPDr213r, X86::VFMADDPDr231r },
+ { X86::VFMADDPSr132rY, X86::VFMADDPSr213rY, X86::VFMADDPSr231rY },
+ { X86::VFMADDPDr132rY, X86::VFMADDPDr213rY, X86::VFMADDPDr231rY },
+ { X86::VFMADDSSr132m, X86::VFMADDSSr213m, X86::VFMADDSSr231m },
+ { X86::VFMADDSDr132m, X86::VFMADDSDr213m, X86::VFMADDSDr231m },
+ { X86::VFMADDPSr132m, X86::VFMADDPSr213m, X86::VFMADDPSr231m },
+ { X86::VFMADDPDr132m, X86::VFMADDPDr213m, X86::VFMADDPDr231m },
+ { X86::VFMADDPSr132mY, X86::VFMADDPSr213mY, X86::VFMADDPSr231mY },
+ { X86::VFMADDPDr132mY, X86::VFMADDPDr213mY, X86::VFMADDPDr231mY },
+
+ { X86::VFMSUBSSr132r, X86::VFMSUBSSr213r, X86::VFMSUBSSr231r },
+ { X86::VFMSUBSDr132r, X86::VFMSUBSDr213r, X86::VFMSUBSDr231r },
+ { X86::VFMSUBPSr132r, X86::VFMSUBPSr213r, X86::VFMSUBPSr231r },
+ { X86::VFMSUBPDr132r, X86::VFMSUBPDr213r, X86::VFMSUBPDr231r },
+ { X86::VFMSUBPSr132rY, X86::VFMSUBPSr213rY, X86::VFMSUBPSr231rY },
+ { X86::VFMSUBPDr132rY, X86::VFMSUBPDr213rY, X86::VFMSUBPDr231rY },
+ { X86::VFMSUBSSr132m, X86::VFMSUBSSr213m, X86::VFMSUBSSr231m },
+ { X86::VFMSUBSDr132m, X86::VFMSUBSDr213m, X86::VFMSUBSDr231m },
+ { X86::VFMSUBPSr132m, X86::VFMSUBPSr213m, X86::VFMSUBPSr231m },
+ { X86::VFMSUBPDr132m, X86::VFMSUBPDr213m, X86::VFMSUBPDr231m },
+ { X86::VFMSUBPSr132mY, X86::VFMSUBPSr213mY, X86::VFMSUBPSr231mY },
+ { X86::VFMSUBPDr132mY, X86::VFMSUBPDr213mY, X86::VFMSUBPDr231mY },
+
+ { X86::VFNMADDSSr132r, X86::VFNMADDSSr213r, X86::VFNMADDSSr231r },
+ { X86::VFNMADDSDr132r, X86::VFNMADDSDr213r, X86::VFNMADDSDr231r },
+ { X86::VFNMADDPSr132r, X86::VFNMADDPSr213r, X86::VFNMADDPSr231r },
+ { X86::VFNMADDPDr132r, X86::VFNMADDPDr213r, X86::VFNMADDPDr231r },
+ { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
+ { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
+ { X86::VFNMADDSSr132m, X86::VFNMADDSSr213m, X86::VFNMADDSSr231m },
+ { X86::VFNMADDSDr132m, X86::VFNMADDSDr213m, X86::VFNMADDSDr231m },
+ { X86::VFNMADDPSr132m, X86::VFNMADDPSr213m, X86::VFNMADDPSr231m },
+ { X86::VFNMADDPDr132m, X86::VFNMADDPDr213m, X86::VFNMADDPDr231m },
+ { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
+ { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
+
+ { X86::VFNMSUBSSr132r, X86::VFNMSUBSSr213r, X86::VFNMSUBSSr231r },
+ { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr213r, X86::VFNMSUBSDr231r },
+ { X86::VFNMSUBPSr132r, X86::VFNMSUBPSr213r, X86::VFNMSUBPSr231r },
+ { X86::VFNMSUBPDr132r, X86::VFNMSUBPDr213r, X86::VFNMSUBPDr231r },
+ { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
+ { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
+ { X86::VFNMSUBSSr132m, X86::VFNMSUBSSr213m, X86::VFNMSUBSSr231m },
+ { X86::VFNMSUBSDr132m, X86::VFNMSUBSDr213m, X86::VFNMSUBSDr231m },
+ { X86::VFNMSUBPSr132m, X86::VFNMSUBPSr213m, X86::VFNMSUBPSr231m },
+ { X86::VFNMSUBPDr132m, X86::VFNMSUBPDr213m, X86::VFNMSUBPDr231m },
+ { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
+ { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
+
+ { X86::VFMADDSUBPSr132r, X86::VFMADDSUBPSr213r, X86::VFMADDSUBPSr231r },
+ { X86::VFMADDSUBPDr132r, X86::VFMADDSUBPDr213r, X86::VFMADDSUBPDr231r },
+ { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
+ { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
+ { X86::VFMADDSUBPSr132m, X86::VFMADDSUBPSr213m, X86::VFMADDSUBPSr231m },
+ { X86::VFMADDSUBPDr132m, X86::VFMADDSUBPDr213m, X86::VFMADDSUBPDr231m },
+ { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
+ { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
+
+ { X86::VFMSUBADDPSr132r, X86::VFMSUBADDPSr213r, X86::VFMSUBADDPSr231r },
+ { X86::VFMSUBADDPDr132r, X86::VFMSUBADDPDr213r, X86::VFMSUBADDPDr231r },
+ { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
+ { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
+ { X86::VFMSUBADDPSr132m, X86::VFMSUBADDPSr213m, X86::VFMSUBADDPSr231m },
+ { X86::VFMSUBADDPDr132m, X86::VFMSUBADDPDr213m, X86::VFMSUBADDPDr231m },
+ { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
+ { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
+ };
+
+ // Define the array that holds FMA*_Int opcodes in groups
+ // of 3 opcodes(132, 213, 231) in each group.
+ static const unsigned IntrinOpcodeGroups[][3] = {
+ { X86::VFMADDSSr132r_Int, X86::VFMADDSSr213r_Int, X86::VFMADDSSr231r_Int },
+ { X86::VFMADDSDr132r_Int, X86::VFMADDSDr213r_Int, X86::VFMADDSDr231r_Int },
+ { X86::VFMADDSSr132m_Int, X86::VFMADDSSr213m_Int, X86::VFMADDSSr231m_Int },
+ { X86::VFMADDSDr132m_Int, X86::VFMADDSDr213m_Int, X86::VFMADDSDr231m_Int },
+
+ { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr231r_Int },
+ { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr231r_Int },
+ { X86::VFMSUBSSr132m_Int, X86::VFMSUBSSr213m_Int, X86::VFMSUBSSr231m_Int },
+ { X86::VFMSUBSDr132m_Int, X86::VFMSUBSDr213m_Int, X86::VFMSUBSDr231m_Int },
+
+ { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
+ { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
+ { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
+ { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
+
+ { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
+ { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
+ { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
+ { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
+ };
+
+ const unsigned Form132Index = 0;
+ const unsigned Form213Index = 1;
+ const unsigned Form231Index = 2;
+ const unsigned FormsNum = 3;
+
+ bool IsIntrinOpcode;
+ isFMA3(Opc, &IsIntrinOpcode);
+
+ size_t GroupsNum;
+ const unsigned (*OpcodeGroups)[3];
+ if (IsIntrinOpcode) {
+ GroupsNum = array_lengthof(IntrinOpcodeGroups);
+ OpcodeGroups = IntrinOpcodeGroups;
+ } else {
+ GroupsNum = array_lengthof(RegularOpcodeGroups);
+ OpcodeGroups = RegularOpcodeGroups;
+ }
+
+ const unsigned *FoundOpcodesGroup = nullptr;
+ size_t FormIndex;
+
+ // Look for the input opcode in the corresponding opcodes table.
+ for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
+ ++GroupIndex) {
+ for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
+ if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
+ FoundOpcodesGroup = OpcodeGroups[GroupIndex];
+ break;
+ }
+ }
}
+
+ // The input opcode does not match with any of the opcodes from the tables.
+ // The unsupported FMA opcode must be added to one of the two opcode groups
+ // defined above.
+ assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
+
+ // Put the lowest index to SrcOpIdx1 to simplify the checks below.
+ if (SrcOpIdx1 > SrcOpIdx2)
+ std::swap(SrcOpIdx1, SrcOpIdx2);
+
+ // TODO: Commuting the 1st operand of FMA*_Int requires some additional
+ // analysis. The commute optimization is legal only if all users of FMA*_Int
+ // use only the lowest element of the FMA*_Int instruction. Such analysis are
+ // not implemented yet. So, just return 0 in that case.
+ // When such analysis are available this place will be the right place for
+ // calling it.
+ if (IsIntrinOpcode && SrcOpIdx1 == 1)
+ return 0;
+
+ unsigned Case;
+ if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
+ Case = 0;
+ else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
+ Case = 1;
+ else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3)
+ Case = 2;
+ else
+ return 0;
+
+ // Define the FMA forms mapping array that helps to map input FMA form
+ // to output FMA form to preserve the operation semantics after
+ // commuting the operands.
+ static const unsigned FormMapping[][3] = {
+ // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
+ // FMA132 A, C, b; ==> FMA231 C, A, b;
+ // FMA213 B, A, c; ==> FMA213 A, B, c;
+ // FMA231 C, A, b; ==> FMA132 A, C, b;
+ { Form231Index, Form213Index, Form132Index },
+ // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
+ // FMA132 A, c, B; ==> FMA132 B, c, A;
+ // FMA213 B, a, C; ==> FMA231 C, a, B;
+ // FMA231 C, a, B; ==> FMA213 B, a, C;
+ { Form132Index, Form231Index, Form213Index },
+ // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
+ // FMA132 a, C, B; ==> FMA213 a, B, C;
+ // FMA213 b, A, C; ==> FMA132 b, C, A;
+ // FMA231 c, A, B; ==> FMA231 c, B, A;
+ { Form213Index, Form132Index, Form231Index }
+ };
+
+ // Everything is ready, just adjust the FMA opcode and return it.
+ FormIndex = FormMapping[Case][FormIndex];
+ return FoundOpcodesGroup[FormIndex];
}
-bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
switch (MI->getOpcode()) {
case X86::CMPPDrri:
@@ -3141,46 +3631,22 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
// Ordered/Unordered/Equal/NotEqual tests
unsigned Imm = MI->getOperand(3).getImm() & 0x7;
switch (Imm) {
- case 0x00: // EQUAL
- case 0x03: // UNORDERED
- case 0x04: // NOT EQUAL
- case 0x07: // ORDERED
- SrcOpIdx1 = 1;
- SrcOpIdx2 = 2;
- return true;
+ case 0x00: // EQUAL
+ case 0x03: // UNORDERED
+ case 0x04: // NOT EQUAL
+ case 0x07: // ORDERED
+ // The indices of the commutable operands are 1 and 2.
+ // Assign them to the returned operand indices here.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
}
return false;
}
- case X86::VFMADDPDr231r:
- case X86::VFMADDPSr231r:
- case X86::VFMADDSDr231r:
- case X86::VFMADDSSr231r:
- case X86::VFMSUBPDr231r:
- case X86::VFMSUBPSr231r:
- case X86::VFMSUBSDr231r:
- case X86::VFMSUBSSr231r:
- case X86::VFNMADDPDr231r:
- case X86::VFNMADDPSr231r:
- case X86::VFNMADDSDr231r:
- case X86::VFNMADDSSr231r:
- case X86::VFNMSUBPDr231r:
- case X86::VFNMSUBPSr231r:
- case X86::VFNMSUBSDr231r:
- case X86::VFNMSUBSSr231r:
- case X86::VFMADDPDr231rY:
- case X86::VFMADDPSr231rY:
- case X86::VFMSUBPDr231rY:
- case X86::VFMSUBPSr231rY:
- case X86::VFNMADDPDr231rY:
- case X86::VFNMADDPSr231rY:
- case X86::VFNMSUBPDr231rY:
- case X86::VFNMSUBPSr231rY:
- SrcOpIdx1 = 2;
- SrcOpIdx2 = 3;
- return true;
default:
+ if (isFMA3(MI->getOpcode()))
+ return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
+ return false;
}
static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
@@ -3821,15 +4287,58 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
return 0;
}
-inline static bool MaskRegClassContains(unsigned Reg) {
+static bool MaskRegClassContains(unsigned Reg) {
return X86::VK8RegClass.contains(Reg) ||
X86::VK16RegClass.contains(Reg) ||
X86::VK32RegClass.contains(Reg) ||
X86::VK64RegClass.contains(Reg) ||
X86::VK1RegClass.contains(Reg);
}
+
+static bool GRRegClassContains(unsigned Reg) {
+ return X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg) ||
+ X86::GR16RegClass.contains(Reg) ||
+ X86::GR8RegClass.contains(Reg);
+}
+static
+unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) {
+ DestReg = getX86SubSuperRegister(DestReg, 32);
+ return X86::KMOVBrk;
+ }
+ if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) {
+ SrcReg = getX86SubSuperRegister(SrcReg, 32);
+ return X86::KMOVBkr;
+ }
+ return 0;
+}
+
static
-unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) {
+ if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg))
+ return X86::KMOVQkk;
+ if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg))
+ return X86::KMOVDrk;
+ if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg))
+ return X86::KMOVQrk;
+ if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+ return X86::KMOVDkr;
+ if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg))
+ return X86::KMOVQkr;
+ return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
+ const X86Subtarget &Subtarget)
+{
+ if (Subtarget.hasDQI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg))
+ return Opc;
+ if (Subtarget.hasBWI())
+ if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
+ return Opc;
if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
X86::VR256XRegClass.contains(DestReg, SrcReg) ||
X86::VR512RegClass.contains(DestReg, SrcReg)) {
@@ -3837,21 +4346,14 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
SrcReg = get512BitSuperRegister(SrcReg);
return X86::VMOVAPSZrr;
}
- if (MaskRegClassContains(DestReg) &&
- MaskRegClassContains(SrcReg))
+ if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
return X86::KMOVWkk;
- if (MaskRegClassContains(DestReg) &&
- (X86::GR32RegClass.contains(SrcReg) ||
- X86::GR16RegClass.contains(SrcReg) ||
- X86::GR8RegClass.contains(SrcReg))) {
- SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
+ if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
+ SrcReg = getX86SubSuperRegister(SrcReg, 32);
return X86::KMOVWkr;
}
- if ((X86::GR32RegClass.contains(DestReg) ||
- X86::GR16RegClass.contains(DestReg) ||
- X86::GR8RegClass.contains(DestReg)) &&
- MaskRegClassContains(SrcReg)) {
- DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
+ if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) {
+ DestReg = getX86SubSuperRegister(DestReg, 32);
return X86::KMOVWrk;
}
return 0;
@@ -3886,7 +4388,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
else if (HasAVX512)
- Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+ Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
else if (X86::VR128RegClass.contains(DestReg, SrcReg))
Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
@@ -3900,34 +4402,86 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- // Moving EFLAGS to / from another register requires a push and a pop.
- // Notice that we have to adjust the stack if we don't want to clobber the
- // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
- if (SrcReg == X86::EFLAGS) {
- if (X86::GR64RegClass.contains(DestReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSHF64));
- BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+ bool FromEFLAGS = SrcReg == X86::EFLAGS;
+ bool ToEFLAGS = DestReg == X86::EFLAGS;
+ int Reg = FromEFLAGS ? DestReg : SrcReg;
+ bool is32 = X86::GR32RegClass.contains(Reg);
+ bool is64 = X86::GR64RegClass.contains(Reg);
+
+ if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+ int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+ int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+ int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
+ int Pop = is64 ? X86::POP64r : X86::POP32r;
+ int PopF = is64 ? X86::POPF64 : X86::POPF32;
+ int AX = is64 ? X86::RAX : X86::EAX;
+
+ if (!Subtarget.hasLAHFSAHF()) {
+ assert(Subtarget.is64Bit() &&
+ "Not having LAHF/SAHF only happens on 64-bit.");
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - usesTheStack.
+ if (FromEFLAGS) {
+ BuildMI(MBB, MI, DL, get(PushF));
+ BuildMI(MBB, MI, DL, get(Pop), DestReg);
+ }
+ if (ToEFLAGS) {
+ BuildMI(MBB, MI, DL, get(Push))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(PopF));
+ }
return;
}
- if (X86::GR32RegClass.contains(DestReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSHF32));
- BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
- return;
+
+ // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
+ // inefficient. Instead:
+ // - Save the overflow flag OF into AL using SETO, and restore it using a
+ // signed 8-bit addition of AL and INT8_MAX.
+ // - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
+ // using LAHF/SAHF.
+ // - When RAX/EAX is live and isn't the destination register, make sure it
+ // isn't clobbered by PUSH/POP'ing it before and after saving/restoring
+ // the flags.
+ // This approach is ~2.25x faster than using PUSHF/POPF.
+ //
+ // This is still somewhat inefficient because we don't know which flags are
+ // actually live inside EFLAGS. Were we able to do a single SETcc instead of
+ // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
+ //
+ // PUSHF/POPF is also potentially incorrect because it affects other flags
+ // such as TF/IF/DF, which LLVM doesn't model.
+ //
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - usesTheStack.
+
+
+ bool AXDead = (Reg == AX) ||
+ (MachineBasicBlock::LQR_Dead ==
+ MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+ if (!AXDead) {
+ // FIXME: If computeRegisterLiveness() reported LQR_Unknown then AX may
+ // actually be dead. This is not a problem for correctness as we are just
+ // (unnecessarily) saving+restoring a dead register. However the
+ // MachineVerifier expects operands that read from dead registers
+ // to be marked with the "undef" flag.
+ BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
}
- }
- if (DestReg == X86::EFLAGS) {
- if (X86::GR64RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSH64r))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::POPF64));
- return;
+ if (FromEFLAGS) {
+ BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
+ BuildMI(MBB, MI, DL, get(X86::LAHF));
+ BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
}
- if (X86::GR32RegClass.contains(SrcReg)) {
- BuildMI(MBB, MI, DL, get(X86::PUSH32r))
- .addReg(SrcReg, getKillRegState(KillSrc));
- BuildMI(MBB, MI, DL, get(X86::POPF32));
- return;
+ if (ToEFLAGS) {
+ BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
+ .addReg(X86::AL)
+ .addImm(INT8_MAX);
+ BuildMI(MBB, MI, DL, get(X86::SAHF));
}
+ if (!AXDead)
+ BuildMI(MBB, MI, DL, get(Pop), AX);
+ return;
}
DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
@@ -4602,9 +5156,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// live-out. If it is live-out, do not optimize.
if ((IsCmpZero || IsSwapped) && !IsSafe) {
MachineBasicBlock *MBB = CmpInstr->getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Successor : MBB->successors())
+ if (Successor->isLiveIn(X86::EFLAGS))
return false;
}
@@ -4645,8 +5198,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
CmpInstr->eraseFromParent();
// Modify the condition code of instructions in OpsToUpdate.
- for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
- OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+ for (auto &Op : OpsToUpdate)
+ Op.first->setDesc(get(Op.second));
return true;
}
@@ -4694,8 +5247,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
return nullptr;
// Check whether we can fold the def into SrcOperandId.
- MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
- if (FoldMI) {
+ if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI)) {
FoldAsLoadDefReg = 0;
return FoldMI;
}
@@ -4725,6 +5277,82 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
return true;
}
+/// Expand a single-def pseudo instruction to a two-addr
+/// instruction with two %k0 reads.
+/// This is used for mapping:
+/// %k4 = K_SET1
+/// to:
+/// %k4 = KXNORrr %k0, %k0
+static bool Expand2AddrKreg(MachineInstrBuilder &MIB,
+ const MCInstrDesc &Desc, unsigned Reg) {
+ assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+ MIB->setDesc(Desc);
+ MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ return true;
+}
+
+static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
+ bool MinusOne) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+
+ // Insert the XOR.
+ BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+
+ // Turn the pseudo into an INC or DEC.
+ MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
+ MIB.addReg(Reg);
+
+ return true;
+}
+
+bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ int64_t Imm = MIB->getOperand(1).getImm();
+ assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
+ MachineBasicBlock::iterator I = MIB.getInstr();
+
+ int StackAdjustment;
+
+ if (Subtarget.is64Bit()) {
+ assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
+ MIB->getOpcode() == X86::MOV32ImmSExti8);
+ // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
+ // widen the register if necessary.
+ StackAdjustment = 8;
+ BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
+ MIB->setDesc(get(X86::POP64r));
+ MIB->getOperand(0)
+ .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
+ } else {
+ assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
+ StackAdjustment = 4;
+ BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
+ MIB->setDesc(get(X86::POP32r));
+ }
+
+ // Build CFI if necessary.
+ MachineFunction &MF = *MBB.getParent();
+ const X86FrameLowering *TFL = Subtarget.getFrameLowering();
+ bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+ bool NeedsDwarfCFI =
+ !IsWin64Prologue &&
+ (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
+ bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
+ if (EmitCFI) {
+ TFL->BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
+ TFL->BuildCFI(MBB, std::next(I), DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
+ }
+
+ return true;
+}
+
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -4735,8 +5363,8 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
const GlobalValue *GV =
cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->
- getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 8, 8);
MachineBasicBlock::iterator I = MIB.getInstr();
BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
@@ -4753,6 +5381,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+ case X86::MOV32r1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+ case X86::MOV32r_1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
+ case X86::MOV32ImmSExti8:
+ case X86::MOV64ImmSExti8:
+ return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
@@ -4777,10 +5412,22 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
+
+ // KNL does not recognize dependency-breaking idioms for mask registers,
+ // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
+ // Using %k0 as the undef input register is a performance heuristic based
+ // on the assumption that %k0 is used less frequently than the other mask
+ // registers, since it is not usable as a write mask.
+ // FIXME: A more advanced approach would be to choose the best input mask
+ // register based on context.
case X86::KSET0B:
- case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+ case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+ case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+ case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
case X86::KSET1B:
- case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+ case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+ case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+ case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
case TargetOpcode::LOAD_STACK_GUARD:
expandLoadStackGuard(MIB, *this);
return true;
@@ -4788,12 +5435,28 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return false;
}
-static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
+ int PtrOffset = 0) {
unsigned NumAddrOps = MOs.size();
- for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
- if (NumAddrOps < 4) // FrameIndex only
- addOffset(MIB, 0);
+
+ if (NumAddrOps < 4) {
+ // FrameIndex only - add an immediate offset (whether its zero or not).
+ for (unsigned i = 0; i != NumAddrOps; ++i)
+ MIB.addOperand(MOs[i]);
+ addOffset(MIB, PtrOffset);
+ } else {
+ // General Memory Addressing - we need to add any offset to an existing
+ // offset.
+ assert(MOs.size() == 5 && "Unexpected memory operand list length");
+ for (unsigned i = 0; i != NumAddrOps; ++i) {
+ const MachineOperand &MO = MOs[i];
+ if (i == 3 && PtrOffset != 0) {
+ MIB.addDisp(MO, PtrOffset);
+ } else {
+ MIB.addOperand(MO);
+ }
+ }
+ }
}
static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
@@ -4828,7 +5491,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
unsigned OpNo, ArrayRef<MachineOperand> MOs,
MachineBasicBlock::iterator InsertPt,
- MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineInstr *MI, const TargetInstrInfo &TII,
+ int PtrOffset = 0) {
// Omit the implicit operands, something BuildMI can't do.
MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
MI->getDebugLoc(), true);
@@ -4838,7 +5502,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI->getOperand(i);
if (i == OpNo) {
assert(MO.isReg() && "Expected to fold into reg operand!");
- addOperands(MIB, MOs);
+ addOperands(MIB, MOs, PtrOffset);
} else {
MIB.addOperand(MO);
}
@@ -4860,6 +5524,40 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
return MIB.addImm(0);
}
+MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
+ MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+ ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align) const {
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ case X86::VINSERTPSrr:
+ // Attempt to convert the load of inserted vector into a fold load
+ // of a single float.
+ if (OpNum == 2) {
+ unsigned Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+ unsigned ZMask = Imm & 15;
+ unsigned DstIdx = (Imm >> 4) & 3;
+ unsigned SrcIdx = (Imm >> 6) & 3;
+
+ unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
+ if (Size <= RCSize && 4 <= Align) {
+ int PtrOffset = SrcIdx * 4;
+ unsigned NewImm = (DstIdx << 4) | ZMask;
+ unsigned NewOpCode =
+ (MI->getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
+ : X86::INSERTPSrm);
+ MachineInstr *NewMI =
+ FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
+ NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
+ return NewMI;
+ }
+ }
+ break;
+ };
+
+ return nullptr;
+}
+
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
@@ -4869,10 +5567,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
bool isCallRegIndirect = Subtarget.callRegIndirect();
bool isTwoAddrFold = false;
- // For CPUs that favor the register form of a call,
- // do not fold loads into calls.
- if (isCallRegIndirect &&
- (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
+ // For CPUs that favor the register form of a call or push,
+ // do not fold loads into calls or pushes, unless optimizing for size
+ // aggressively.
+ if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&
+ (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r ||
+ MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r ||
+ MI->getOpcode() == X86::PUSH64r))
return nullptr;
unsigned NumOps = MI->getDesc().getNumOperands();
@@ -4886,6 +5587,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
MachineInstr *NewMI = nullptr;
+
+ // Attempt to fold any custom cases we have.
+ if (MachineInstr *CustomMI =
+ foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
+ return CustomMI;
+
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
@@ -4963,60 +5670,56 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// If the instruction and target operand are commutable, commute the
// instruction and try again.
if (AllowCommute) {
- unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2;
+ unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
bool HasDef = MI->getDesc().getNumDefs();
unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
- bool Tied0 =
- 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
bool Tied1 =
+ 0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+ bool Tied2 =
0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
// If either of the commutable operands are tied to the destination
// then we can not commute + fold.
- if ((HasDef && Reg0 == Reg1 && Tied0) ||
- (HasDef && Reg0 == Reg2 && Tied1))
+ if ((HasDef && Reg0 == Reg1 && Tied1) ||
+ (HasDef && Reg0 == Reg2 && Tied2))
return nullptr;
- if ((CommuteOpIdx1 == OriginalOpIdx) ||
- (CommuteOpIdx2 == OriginalOpIdx)) {
- MachineInstr *CommutedMI = commuteInstruction(MI, false);
- if (!CommutedMI) {
- // Unable to commute.
- return nullptr;
- }
- if (CommutedMI != MI) {
- // New instruction. We can't fold from this.
- CommutedMI->eraseFromParent();
- return nullptr;
- }
+ MachineInstr *CommutedMI =
+ commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+ if (!CommutedMI) {
+ // Unable to commute.
+ return nullptr;
+ }
+ if (CommutedMI != MI) {
+ // New instruction. We can't fold from this.
+ CommutedMI->eraseFromParent();
+ return nullptr;
+ }
- // Attempt to fold with the commuted version of the instruction.
- unsigned CommuteOp =
- (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
- NewMI =
- foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
- /*AllowCommute=*/false);
- if (NewMI)
- return NewMI;
-
- // Folding failed again - undo the commute before returning.
- MachineInstr *UncommutedMI = commuteInstruction(MI, false);
- if (!UncommutedMI) {
- // Unable to commute.
- return nullptr;
- }
- if (UncommutedMI != MI) {
- // New instruction. It doesn't need to be kept.
- UncommutedMI->eraseFromParent();
- return nullptr;
- }
+ // Attempt to fold with the commuted version of the instruction.
+ NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
+ Size, Align, /*AllowCommute=*/false);
+ if (NewMI)
+ return NewMI;
- // Return here to prevent duplicate fuse failure report.
+ // Folding failed again - undo the commute before returning.
+ MachineInstr *UncommutedMI =
+ commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+ if (!UncommutedMI) {
+ // Unable to commute.
return nullptr;
}
+ if (UncommutedMI != MI) {
+ // New instruction. It doesn't need to be kept.
+ UncommutedMI->eraseFromParent();
+ return nullptr;
+ }
+
+ // Return here to prevent duplicate fuse failure report.
+ return nullptr;
}
}
@@ -5208,13 +5911,14 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
// If MI kills this register, the false dependence is already broken.
if (MI->killsRegister(Reg, TRI))
return;
+
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
- bool HasAVX = Subtarget.hasAVX();
- unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+ unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+ MI->addRegisterKilled(Reg, TRI, true);
} else if (X86::VR256RegClass.contains(Reg)) {
// Use vxorps to clear the full ymm register.
// It wants to read and write the xmm sub-register.
@@ -5222,21 +5926,20 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
.addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
.addReg(Reg, RegState::ImplicitDefine);
- } else
- return;
- MI->addRegisterKilled(Reg, TRI, true);
+ MI->addRegisterKilled(Reg, TRI, true);
+ }
}
MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
// Check switch flag
- if (NoFusing) return nullptr;
+ if (NoFusing)
+ return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
- if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- hasPartialRegUpdate(MI->getOpcode()))
+ if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
return nullptr;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -5303,6 +6006,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+ case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
+ case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
+ case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
+ case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
+ case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
+ case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
return false;
default:
return true;
@@ -5318,6 +6027,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+ case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
+ case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
+ case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
+ case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
+ case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
+ case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
return false;
default:
return true;
@@ -5342,10 +6057,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
// Check switch flag
if (NoFusing) return nullptr;
- // Unless optimizing for size, don't fold to avoid partial
- // register update stalls
- if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
- hasPartialRegUpdate(MI->getOpcode()))
+ // Avoid partial register update stalls unless optimizing for size.
+ if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
return nullptr;
// Determine the alignment of the load.
@@ -5460,62 +6173,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
/*Size=*/0, Alignment, /*AllowCommute=*/true);
}
-bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- ArrayRef<unsigned> Ops) const {
- // Check switch flag
- if (NoFusing) return 0;
-
- if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
- switch (MI->getOpcode()) {
- default: return false;
- case X86::TEST8rr:
- case X86::TEST16rr:
- case X86::TEST32rr:
- case X86::TEST64rr:
- return true;
- case X86::ADD32ri:
- // FIXME: AsmPrinter doesn't know how to handle
- // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
- if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
- return false;
- break;
- }
- }
-
- if (Ops.size() != 1)
- return false;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- unsigned NumOps = MI->getDesc().getNumOperands();
- bool isTwoAddr = NumOps > 1 &&
- MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
- // Folding a memory location into the two-address part of a two-address
- // instruction is different than folding it other places. It requires
- // replacing the *two* registers with the memory location.
- const DenseMap<unsigned,
- std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
- if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
- OpcodeTablePtr = &RegOp2MemOpTable2Addr;
- } else if (OpNum == 0) {
- if (Opc == X86::MOV32r0)
- return true;
-
- OpcodeTablePtr = &RegOp2MemOpTable0;
- } else if (OpNum == 1) {
- OpcodeTablePtr = &RegOp2MemOpTable1;
- } else if (OpNum == 2) {
- OpcodeTablePtr = &RegOp2MemOpTable2;
- } else if (OpNum == 3) {
- OpcodeTablePtr = &RegOp2MemOpTable3;
- }
-
- if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
- return true;
- return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
-}
-
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
@@ -5536,9 +6193,10 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const MCInstrDesc &MCID = get(Opc);
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+ // TODO: Check if 32-byte or greater accesses are slow too?
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@@ -5582,20 +6240,19 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
- for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
- MIB.addOperand(BeforeOps[i]);
+ for (MachineOperand &BeforeOp : BeforeOps)
+ MIB.addOperand(BeforeOp);
if (FoldedLoad)
MIB.addReg(Reg);
- for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
- MIB.addOperand(AfterOps[i]);
- for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
- MachineOperand &MO = ImpOps[i];
- MIB.addReg(MO.getReg(),
- getDefRegState(MO.isDef()) |
+ for (MachineOperand &AfterOp : AfterOps)
+ MIB.addOperand(AfterOp);
+ for (MachineOperand &ImpOp : ImpOps) {
+ MIB.addReg(ImpOp.getReg(),
+ getDefRegState(ImpOp.isDef()) |
RegState::Implicit |
- getKillRegState(MO.isKill()) |
- getDeadRegState(MO.isDead()) |
- getUndefRegState(MO.isUndef()));
+ getKillRegState(ImpOp.isKill()) |
+ getDeadRegState(ImpOp.isDead()) |
+ getUndefRegState(ImpOp.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
switch (DataMI->getOpcode()) {
@@ -5686,9 +6343,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
+ // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+ // memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
@@ -5729,9 +6388,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !Subtarget.isUnalignedMemAccessFast())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
+ // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+ // memory access is slow above.
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
@@ -6192,16 +6853,16 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
// domains, but they require a bit more work than just switching opcodes.
static const uint16_t *lookup(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
- if (ReplaceableInstrs[i][domain-1] == opcode)
- return ReplaceableInstrs[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}
static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
- for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
- if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
- return ReplaceableInstrsAVX2[i];
+ for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
+ if (Row[domain-1] == opcode)
+ return Row;
return nullptr;
}
@@ -6347,230 +7008,181 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
return isHighLatencyDef(DefMI->getOpcode());
}
-static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
- const MachineBasicBlock *MBB) {
- assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
- const MachineOperand &Op1 = Inst.getOperand(1);
- const MachineOperand &Op2 = Inst.getOperand(2);
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- // We need virtual register definitions.
- MachineInstr *MI1 = nullptr;
- MachineInstr *MI2 = nullptr;
- if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
- MI1 = MRI.getUniqueVRegDef(Op1.getReg());
- if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
- MI2 = MRI.getUniqueVRegDef(Op2.getReg());
-
- // And they need to be in the trace (otherwise, they won't have a depth).
- if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
- return true;
+bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) const {
+ assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
+ "Reassociation needs binary operators");
+
+ // Integer binary math/logic instructions have a third source operand:
+ // the EFLAGS register. That operand must be both defined here and never
+ // used; ie, it must be dead. If the EFLAGS operand is live, then we can
+ // not change anything because rearranging the operands could affect other
+ // instructions that depend on the exact status flags (zero, sign, etc.)
+ // that are set by using these particular operands with this operation.
+ if (Inst.getNumOperands() == 4) {
+ assert(Inst.getOperand(3).isReg() &&
+ Inst.getOperand(3).getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+ if (!Inst.getOperand(3).isDead())
+ return false;
+ }
- return false;
-}
-
-static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
- const MachineBasicBlock *MBB = Inst.getParent();
- const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
- MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
- unsigned AssocOpcode = Inst.getOpcode();
-
- // If only one operand has the same opcode and it's the second source operand,
- // the operands must be commuted.
- Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
- if (Commuted)
- std::swap(MI1, MI2);
-
- // 1. The previous instruction must be the same type as Inst.
- // 2. The previous instruction must have virtual register definitions for its
- // operands in the same basic block as Inst.
- // 3. The previous instruction's result must only be used by Inst.
- if (MI1->getOpcode() == AssocOpcode &&
- hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
- MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
- return true;
-
- return false;
+ return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
}
// TODO: There are many more machine instruction opcodes to match:
// 1. Other data types (integer, vectors)
-// 2. Other math / logic operations (and, or)
-static bool isAssociativeAndCommutative(unsigned Opcode) {
- switch (Opcode) {
+// 2. Other math / logic operations (xor, or)
+// 3. Other forms of the same operation (intrinsics and other variants)
+bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ case X86::AND8rr:
+ case X86::AND16rr:
+ case X86::AND32rr:
+ case X86::AND64rr:
+ case X86::OR8rr:
+ case X86::OR16rr:
+ case X86::OR32rr:
+ case X86::OR64rr:
+ case X86::XOR8rr:
+ case X86::XOR16rr:
+ case X86::XOR32rr:
+ case X86::XOR64rr:
+ case X86::IMUL16rr:
+ case X86::IMUL32rr:
+ case X86::IMUL64rr:
+ case X86::PANDrr:
+ case X86::PORrr:
+ case X86::PXORrr:
+ case X86::VPANDrr:
+ case X86::VPANDYrr:
+ case X86::VPORrr:
+ case X86::VPORYrr:
+ case X86::VPXORrr:
+ case X86::VPXORYrr:
+ // Normal min/max instructions are not commutative because of NaN and signed
+ // zero semantics, but these are. Thus, there's no need to check for global
+ // relaxed math; the instructions themselves have the properties we need.
+ case X86::MAXCPDrr:
+ case X86::MAXCPSrr:
+ case X86::MAXCSDrr:
+ case X86::MAXCSSrr:
+ case X86::MINCPDrr:
+ case X86::MINCPSrr:
+ case X86::MINCSDrr:
+ case X86::MINCSSrr:
+ case X86::VMAXCPDrr:
+ case X86::VMAXCPSrr:
+ case X86::VMAXCPDYrr:
+ case X86::VMAXCPSYrr:
+ case X86::VMAXCSDrr:
+ case X86::VMAXCSSrr:
+ case X86::VMINCPDrr:
+ case X86::VMINCPSrr:
+ case X86::VMINCPDYrr:
+ case X86::VMINCPSYrr:
+ case X86::VMINCSDrr:
+ case X86::VMINCSSrr:
+ return true;
+ case X86::ADDPDrr:
+ case X86::ADDPSrr:
case X86::ADDSDrr:
case X86::ADDSSrr:
- case X86::VADDSDrr:
- case X86::VADDSSrr:
+ case X86::MULPDrr:
+ case X86::MULPSrr:
case X86::MULSDrr:
case X86::MULSSrr:
+ case X86::VADDPDrr:
+ case X86::VADDPSrr:
+ case X86::VADDPDYrr:
+ case X86::VADDPSYrr:
+ case X86::VADDSDrr:
+ case X86::VADDSSrr:
+ case X86::VMULPDrr:
+ case X86::VMULPSrr:
+ case X86::VMULPDYrr:
+ case X86::VMULPSYrr:
case X86::VMULSDrr:
case X86::VMULSSrr:
- return true;
+ return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
default:
return false;
}
}
-/// Return true if the input instruction is part of a chain of dependent ops
-/// that are suitable for reassociation, otherwise return false.
-/// If the instruction's operands must be commuted to have a previous
-/// instruction of the same type define the first source operand, Commuted will
-/// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
- // 1. The operation must be associative and commutative.
- // 2. The instruction must have virtual register definitions for its
- // operands in the same basic block.
- // 3. The instruction must have a reassociable sibling.
- if (isAssociativeAndCommutative(Inst.getOpcode()) &&
- hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
- hasReassocSibling(Inst, Commuted))
- return true;
-
- return false;
-}
-
-// FIXME: This has the potential to be expensive (compile time) while not
-// improving the code at all. Some ways to limit the overhead:
-// 1. Track successful transforms; bail out if hit rate gets too low.
-// 2. Only enable at -O3 or some other non-default optimization level.
-// 3. Pre-screen pattern candidates here: if an operand of the previous
-// instruction is known to not increase the critical path, then don't match
-// that pattern.
-bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
- if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
- return false;
-
- // TODO: There is nothing x86-specific here except the instruction type.
- // This logic could be hoisted into the machine combiner pass itself.
-
- // Look for this reassociation pattern:
- // B = A op X (Prev)
- // C = B op Y (Root)
-
- bool Commute;
- if (isReassocCandidate(Root, Commute)) {
- // We found a sequence of instructions that may be suitable for a
- // reassociation of operands to increase ILP. Specify each commutation
- // possibility for the Prev instruction in the sequence and let the
- // machine combiner decide if changing the operands is worthwhile.
- if (Commute) {
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
- } else {
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
- Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
- }
- return true;
- }
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+ MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const {
+ // Integer instructions define an implicit EFLAGS source register operand as
+ // the third source (fourth total) operand.
+ if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
+ return;
- return false;
+ assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
+ "Unexpected instruction type for reassociation");
+
+ MachineOperand &OldOp1 = OldMI1.getOperand(3);
+ MachineOperand &OldOp2 = OldMI2.getOperand(3);
+ MachineOperand &NewOp1 = NewMI1.getOperand(3);
+ MachineOperand &NewOp2 = NewMI2.getOperand(3);
+
+ assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
+ "Must have dead EFLAGS operand in reassociable instruction");
+ assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
+ "Must have dead EFLAGS operand in reassociable instruction");
+
+ (void)OldOp1;
+ (void)OldOp2;
+
+ assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+ assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
+ "Unexpected operand in reassociable instruction");
+
+ // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
+ // of this pass or other passes. The EFLAGS operands must be dead in these new
+ // instructions because the EFLAGS operands in the original instructions must
+ // be dead in order for reassociation to occur.
+ NewOp1.setIsDead();
+ NewOp2.setIsDead();
}
-/// Attempt the following reassociation to reduce critical path length:
-/// B = A op X (Prev)
-/// C = B op Y (Root)
-/// ===>
-/// B = X op Y
-/// C = A op B
-static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
- MachineCombinerPattern::MC_PATTERN Pattern,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
- MachineFunction *MF = Root.getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
-
- // This array encodes the operand index for each parameter because the
- // operands may be commuted. Each row corresponds to a pattern value,
- // and each column specifies the index of A, B, X, Y.
- unsigned OpIdx[4][4] = {
- { 1, 1, 2, 2 },
- { 1, 2, 2, 1 },
- { 2, 1, 1, 2 },
- { 2, 2, 1, 1 }
- };
-
- MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
- MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
- MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
- MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
- MachineOperand &OpC = Root.getOperand(0);
-
- unsigned RegA = OpA.getReg();
- unsigned RegB = OpB.getReg();
- unsigned RegX = OpX.getReg();
- unsigned RegY = OpY.getReg();
- unsigned RegC = OpC.getReg();
-
- if (TargetRegisterInfo::isVirtualRegister(RegA))
- MRI.constrainRegClass(RegA, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegB))
- MRI.constrainRegClass(RegB, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegX))
- MRI.constrainRegClass(RegX, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegY))
- MRI.constrainRegClass(RegY, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegC))
- MRI.constrainRegClass(RegC, RC);
-
- // Create a new virtual register for the result of (X op Y) instead of
- // recycling RegB because the MachineCombiner's computation of the critical
- // path requires a new register definition rather than an existing one.
- unsigned NewVR = MRI.createVirtualRegister(RC);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
-
- unsigned Opcode = Root.getOpcode();
- bool KillA = OpA.isKill();
- bool KillX = OpX.isKill();
- bool KillY = OpY.isKill();
-
- // Create new instructions for insertion.
- MachineInstrBuilder MIB1 =
- BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
- .addReg(RegX, getKillRegState(KillX))
- .addReg(RegY, getKillRegState(KillY));
- InsInstrs.push_back(MIB1);
-
- MachineInstrBuilder MIB2 =
- BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
- .addReg(RegA, getKillRegState(KillA))
- .addReg(NewVR, getKillRegState(true));
- InsInstrs.push_back(MIB2);
-
- // Record old instructions for deletion.
- DelInstrs.push_back(&Prev);
- DelInstrs.push_back(&Root);
+std::pair<unsigned, unsigned>
+X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+ return std::make_pair(TF, 0u);
}
-void X86InstrInfo::genAlternativeCodeSequence(
- MachineInstr &Root,
- MachineCombinerPattern::MC_PATTERN Pattern,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
- MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
-
- // Select the previous instruction in the sequence based on the input pattern.
- MachineInstr *Prev = nullptr;
- switch (Pattern) {
- case MachineCombinerPattern::MC_REASSOC_AX_BY:
- case MachineCombinerPattern::MC_REASSOC_XA_BY:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
- break;
- case MachineCombinerPattern::MC_REASSOC_AX_YB:
- case MachineCombinerPattern::MC_REASSOC_XA_YB:
- Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
- }
- assert(Prev && "Unknown pattern for machine combiner");
-
- reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
- return;
+ArrayRef<std::pair<unsigned, const char *>>
+X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+ using namespace X86II;
+ static const std::pair<unsigned, const char *> TargetFlags[] = {
+ {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
+ {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
+ {MO_GOT, "x86-got"},
+ {MO_GOTOFF, "x86-gotoff"},
+ {MO_GOTPCREL, "x86-gotpcrel"},
+ {MO_PLT, "x86-plt"},
+ {MO_TLSGD, "x86-tlsgd"},
+ {MO_TLSLD, "x86-tlsld"},
+ {MO_TLSLDM, "x86-tlsldm"},
+ {MO_GOTTPOFF, "x86-gottpoff"},
+ {MO_INDNTPOFF, "x86-indntpoff"},
+ {MO_TPOFF, "x86-tpoff"},
+ {MO_DTPOFF, "x86-dtpoff"},
+ {MO_NTPOFF, "x86-ntpoff"},
+ {MO_GOTNTPOFF, "x86-gotntpoff"},
+ {MO_DLLIMPORT, "x86-dllimport"},
+ {MO_DARWIN_STUB, "x86-darwin-stub"},
+ {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
+ {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
+ {MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, "x86-darwin-hidden-nonlazy-pic-base"},
+ {MO_TLVP, "x86-tlvp"},
+ {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
+ {MO_SECREL, "x86-secrel"}};
+ return makeArrayRef(TargetFlags);
}
namespace {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index bf63336..9d40334 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -23,22 +23,10 @@
#include "X86GenInstrInfo.inc"
namespace llvm {
+ class MachineInstrBuilder;
class X86RegisterInfo;
class X86Subtarget;
- namespace MachineCombinerPattern {
- enum MC_PATTERN : int {
- // These are commutative variants for reassociating a computation chain
- // of the form:
- // B = A op X (Prev)
- // C = B op Y (Root)
- MC_REASSOC_AX_BY = 0,
- MC_REASSOC_AX_YB = 1,
- MC_REASSOC_XA_BY = 2,
- MC_REASSOC_XA_YB = 3,
- };
- } // end namespace MachineCombinerPattern
-
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
@@ -259,14 +247,64 @@ public:
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const override;
- /// commuteInstruction - We have a few instructions that must be hacked on to
- /// commute them.
+ /// Returns true iff the routine could find two commutable operands in the
+ /// given machine instruction.
+ /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their
+ /// input values can be re-defined in this method only if the input values
+ /// are not pre-defined, which is designated by the special value
+ /// 'CommuteAnyOperandIndex' assigned to it.
+ /// If both of indices are pre-defined and refer to some operands, then the
+ /// method simply returns true if the corresponding operands are commutable
+ /// and returns false otherwise.
///
- MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
+ /// For example, calling this method this way:
+ /// unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+ /// findCommutedOpIndices(MI, Op1, Op2);
+ /// can be interpreted as a query asking to find an operand that would be
+ /// commutable with the operand#1.
bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const override;
+ /// Returns true if the routine could find two commutable operands
+ /// in the given FMA instruction. Otherwise, returns false.
+ ///
+ /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
+ /// The output indices of the commuted operands are returned in these
+ /// arguments. Also, the input values of these arguments may be preset either
+ /// to indices of operands that must be commuted or be equal to a special
+ /// value 'CommuteAnyOperandIndex' which means that the corresponding
+ /// operand index is not set and this method is free to pick any of
+ /// available commutable operands.
+ ///
+ /// For example, calling this method this way:
+ /// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
+ /// findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+ /// can be interpreted as a query asking if the operand #1 can be swapped
+ /// with any other available operand (e.g. operand #2, operand #3, etc.).
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ bool findFMA3CommutedOpIndices(MachineInstr *MI,
+ unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const;
+
+ /// Returns an adjusted FMA opcode that must be used in FMA instruction that
+ /// performs the same computations as the given MI but which has the operands
+ /// \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+ /// It may return 0 if it is unsafe to commute the operands.
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given \p MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ unsigned getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2) const;
+
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr* MI) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -342,11 +380,6 @@ public:
MachineBasicBlock::iterator InsertPt,
MachineInstr *LoadMI) const override;
- /// canFoldMemoryOperand - Returns true if the specified load / store is
- /// folding is possible.
- bool canFoldMemoryOperand(const MachineInstr *,
- ArrayRef<unsigned>) const override;
-
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@@ -406,10 +439,9 @@ public:
bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- static bool isX86_64ExtendedReg(const MachineOperand &MO) {
- if (!MO.isReg()) return false;
- return X86II::isX86_64ExtendedReg(MO.getReg());
- }
+ /// True if MI has a condition code def, e.g. EFLAGS, that is
+ /// not marked dead.
+ bool hasLiveCondCodeDef(MachineInstr *MI) const;
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
@@ -452,26 +484,19 @@ public:
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI,
unsigned UseIdx) const override;
-
bool useMachineCombiner() const override {
return true;
}
-
- /// Return true when there is potentially a faster code sequence
- /// for an instruction chain ending in <Root>. All potential patterns are
- /// output in the <Pattern> array.
- bool getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
-
- /// When getMachineCombinerPatterns() finds a pattern, this function generates
- /// the instructions that could replace the original code sequence.
- void genAlternativeCodeSequence(
- MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
- SmallVectorImpl<MachineInstr *> &InsInstrs,
- SmallVectorImpl<MachineInstr *> &DelInstrs,
- DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
+ bool hasReassociableOperands(const MachineInstr &Inst,
+ const MachineBasicBlock *MBB) const override;
+
+ void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+ MachineInstr &NewMI1,
+ MachineInstr &NewMI2) const override;
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2 if having two register operands, and the value it
@@ -500,16 +525,49 @@ public:
unsigned &FoldAsLoadDefReg,
MachineInstr *&DefMI) const override;
+ std::pair<unsigned, unsigned>
+ decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+ ArrayRef<std::pair<unsigned, const char *>>
+ getSerializableDirectMachineOperandTargetFlags() const override;
+
+protected:
+ /// Commutes the operands in the given instruction by changing the operands
+ /// order and/or changing the instruction's opcode and/or the immediate value
+ /// operand.
+ ///
+ /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+ /// to be commuted.
+ ///
+ /// Do not call this method for a non-commutable instruction or
+ /// non-commutable operands.
+ /// Even though the instruction is commutable, the method may still
+ /// fail to commute the operands, null pointer is returned in such cases.
+ MachineInstr *commuteInstructionImpl(MachineInstr *MI, bool NewMI,
+ unsigned CommuteOpIdx1,
+ unsigned CommuteOpIdx2) const override;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
+ /// Handles memory folding for special case instructions, for instance those
+ /// requiring custom manipulation of the address.
+ MachineInstr *foldMemoryOperandCustom(MachineFunction &MF, MachineInstr *MI,
+ unsigned OpNum,
+ ArrayRef<MachineOperand> MOs,
+ MachineBasicBlock::iterator InsertPt,
+ unsigned Size, unsigned Align) const;
+
/// isFrameOperand - Return true and the FrameIndex if the specified
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;
+
+ /// Expand the MOVImmSExti8 pseudo-instructions.
+ bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index 52bab9c..f4ca2b8 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -106,8 +106,6 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
-def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -158,6 +156,8 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
+ [SDNPHasChain, SDNPOptInGlue]>;
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
@@ -250,9 +250,6 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
- [SDNPHasChain, SDNPOutGlue]>;
-
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
@@ -344,18 +341,21 @@ def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
def vz32mem : X86VMemOperand<VR512, "printi32mem", X86MemVZ32Operand>;
def vz64mem : X86VMemOperand<VR512, "printi64mem", X86MemVZ64Operand>;
-// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
-// plain GR64, so that it doesn't potentially require a REX prefix.
-def i8mem_NOREX : Operand<i64> {
+// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
+// of a plain GPR, so that it doesn't potentially require a REX prefix.
+def ptr_rc_norex : PointerLikeRegClass<2>;
+def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
+
+def i8mem_NOREX : Operand<iPTR> {
let PrintMethod = "printi8mem";
- let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+ let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, i8imm);
let ParserMatchClass = X86Mem8AsmOperand;
let OperandType = "OPERAND_MEMORY";
}
// GPRs available for tailcall.
// It represents GR32_TC, GR64_TC or GR64_TCW64.
-def ptr_rc_tailcall : PointerLikeRegClass<2>;
+def ptr_rc_tailcall : PointerLikeRegClass<4>;
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
@@ -697,34 +697,34 @@ def lea64mem : Operand<i64> {
// X86 Complex Pattern Definitions.
//
-// Define X86 specific addressing mode.
-def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
-def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+// Define X86-specific addressing mode.
+def addr : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
// In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
-def lea64_32addr : ComplexPattern<i32, 5, "SelectLEA64_32Addr",
+def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
[add, sub, mul, X86mul_imm, shl, or,
frameindex, X86WrapperRIP],
[]>;
-def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32addr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
[tglobaltlsaddr], []>;
-def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
+def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
@@ -767,12 +767,21 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">,
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">,
AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
+def NoBWI : Predicate<"!Subtarget->hasBWI()">;
def HasVLX : Predicate<"Subtarget->hasVLX()">,
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
+def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
+def PKU : Predicate<"!Subtarget->hasPKU()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
+def HasFXSR : Predicate<"Subtarget->hasFXSR()">;
+def HasXSAVE : Predicate<"Subtarget->hasXSAVE()">;
+def HasXSAVEOPT : Predicate<"Subtarget->hasXSAVEOPT()">;
+def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
+def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
def HasFMA : Predicate<"Subtarget->hasFMA()">;
def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
@@ -794,6 +803,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
@@ -812,6 +822,8 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">,
AssemblerPredicate<"Mode32Bit", "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
+def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
+ "Subtarget->getFrameLowering()->hasFP(*MF)">;
def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@@ -825,6 +837,7 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
+def OptForMinSize : Predicate<"OptForMinSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
@@ -867,20 +880,54 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
}]>;
-def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
-def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
-def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+def i16immSExt8 : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
+def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
+def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
+
+// If we have multiple users of an immediate, it's much smaller to reuse
+// the register, rather than encode the immediate in every instruction.
+// This has the risk of increasing register pressure from stretched live
+// ranges, however, the immediates should be trivial to rematerialize by
+// the RA in the event of high register pressure.
+// TODO : This is currently enabled for stores and binary ops. There are more
+// cases for which this can be enabled, though this catches the bulk of the
+// issues.
+// TODO2 : This should really also be enabled under O2, but there's currently
+// an issue with RA where we don't pull the constants into their users
+// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
+// issue.
+// TODO3 : This is currently limited to single basic blocks (DAG creation
+// pulls block immediates to the top and merges them if necessary).
+// Eventually, it would be nice to allow ConstantHoisting to merge constants
+// globally for potentially added savings.
+//
+def imm8_su : PatLeaf<(i8 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm16_su : PatLeaf<(i16 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm32_su : PatLeaf<(i32 imm), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
-def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsigned field.
-def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
def i64immZExt32SExt8 : ImmLeaf<i64, [{
- return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+ return isUInt<32>(Imm) && isInt<8>(static_cast<int32_t>(Imm));
}]>;
// Helper fragments for loads.
@@ -914,11 +961,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -1020,12 +1068,8 @@ def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
IIC_PUSH_REG>, OpSize16;
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
- IIC_PUSH_MEM>, OpSize16;
def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
- IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
"push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
@@ -1039,6 +1083,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
} // mayStore, SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+ IIC_PUSH_MEM>, OpSize16;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+ IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
@@ -1071,9 +1123,11 @@ def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayStore, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayStore, SchedRW
+} // mayLoad, mayStore, SchedRW
}
let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
@@ -1275,13 +1329,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
let SchedRW = [WriteStore] in {
def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+ [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>;
def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
"mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
+ [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
+ [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1457,10 +1511,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
- [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
+ [(set EFLAGS, (X86sahf AH))], IIC_AHF>,
+ Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
- IIC_AHF>; // AH = flags
+ IIC_AHF>, // AH = flags
+ Requires<[HasLAHFSAHF]>;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -1894,37 +1950,38 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
}
// Table lookup instructions
+let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
Sched<[WriteLoad]>;
let SchedRW = [WriteMicrocoded] in {
// ASCII Adjust After Addition
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
Requires<[Not64BitMode]>;
// ASCII Adjust AX Before Division
-// sets AL, AH and EFLAGS and uses AL and AH
+let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
"aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>;
// ASCII Adjust AX After Multiply
-// sets AL, AH and EFLAGS and uses AL
+let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
"aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>;
// ASCII Adjust AL After Subtraction - sets
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>,
Requires<[Not64BitMode]>;
// Decimal Adjust AL after Addition
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
Requires<[Not64BitMode]>;
// Decimal Adjust AL after Subtraction
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
Requires<[Not64BitMode]>;
} // SchedRW
@@ -2357,6 +2414,32 @@ defm TZMSK : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
} // HasTBM, EFLAGS
//===----------------------------------------------------------------------===//
+// MONITORX/MWAITX Instructions
+//
+let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
+def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", [],
+ IIC_SSE_MONITOR>, TB;
+let Uses = [ECX, EAX, EBX] in
+def MWAITXrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [], IIC_SSE_MWAIT>,
+ TB;
+} // SchedRW
+
+def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrr)>, Requires<[Not64BitMode]>;
+def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+ Requires<[Not64BitMode]>;
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+ Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// CLZERO Instruction
+//
+let Uses = [EAX] in
+def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
+
+//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
//===----------------------------------------------------------------------===//
@@ -2498,8 +2581,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"loopz", "loope", "att">;
-def : MnemonicAlias<"loopnz", "loopne", "att">;
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>;
@@ -2532,14 +2615,15 @@ def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"repe", "rep", "att">;
-def : MnemonicAlias<"repz", "rep", "att">;
-def : MnemonicAlias<"repnz", "repne", "att">;
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sal", "shl", "intel">;
def : MnemonicAlias<"salb", "shlb", "att">;
def : MnemonicAlias<"salw", "shlw", "att">;
def : MnemonicAlias<"sall", "shll", "att">;
@@ -2579,14 +2663,14 @@ def : MnemonicAlias<"fcmova", "fcmovnbe", "att">;
def : MnemonicAlias<"fcmovnae", "fcmovb", "att">;
def : MnemonicAlias<"fcmovna", "fcmovbe", "att">;
def : MnemonicAlias<"fcmovae", "fcmovnb", "att">;
-def : MnemonicAlias<"fcomip", "fcompi", "att">;
+def : MnemonicAlias<"fcomip", "fcompi">;
def : MnemonicAlias<"fildq", "fildll", "att">;
def : MnemonicAlias<"fistpq", "fistpll", "att">;
def : MnemonicAlias<"fisttpq", "fisttpll", "att">;
def : MnemonicAlias<"fldcww", "fldcw", "att">;
def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
-def : MnemonicAlias<"fucomip", "fucompi", "att">;
+def : MnemonicAlias<"fucomip", "fucompi">;
def : MnemonicAlias<"fwait", "wait">;
def : MnemonicAlias<"fxsaveq", "fxsave64", "att">;
@@ -2594,7 +2678,9 @@ def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">;
def : MnemonicAlias<"xsaveq", "xsave64", "att">;
def : MnemonicAlias<"xrstorq", "xrstor64", "att">;
def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
-
+def : MnemonicAlias<"xrstorsq", "xrstors64", "att">;
+def : MnemonicAlias<"xsavecq", "xsavec64", "att">;
+def : MnemonicAlias<"xsavesq", "xsaves64", "att">;
class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
string VariantName>
@@ -2640,8 +2726,8 @@ defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">;
//===----------------------------------------------------------------------===//
// aad/aam default to base 10 if no operand is specified.
-def : InstAlias<"aad", (AAD8i8 10)>;
-def : InstAlias<"aam", (AAM8i8 10)>;
+def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>;
+def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
// Likewise for btc/btr/bts.
@@ -2719,8 +2805,10 @@ def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>;
// Various unary fpstack operations default to operating on on ST1.
// For example, "fxch" -> "fxch %st(1)"
def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
+def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
@@ -2798,20 +2886,20 @@ def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16Bit
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;
def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>;
-def : InstAlias<"inb\t$port", (IN8ri i8imm:$port), 0>;
-def : InstAlias<"inw\t$port", (IN16ri i8imm:$port), 0>;
-def : InstAlias<"inl\t$port", (IN32ri i8imm:$port), 0>;
+def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>;
+def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>;
+def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
@@ -2861,9 +2949,9 @@ def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:
def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>;
def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>;
def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>;
-def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port), 0>;
-def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port), 0>;
-def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>;
+def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>;
+def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>;
+def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>;
// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
@@ -2940,3 +3028,34 @@ def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
(XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>;
def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
+
+// These aliases exist to get the parser to prioritize matching 8-bit
+// immediate encodings over matching the implicit ax/eax/rax encodings. By
+// explicitly mentioning the A register here, these entries will be ordered
+// first due to the more explicit immediate type.
+def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>;
+
+def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>;
+
+def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index eaa7894..11dc1e7 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -249,6 +249,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
(MMX_X86movd2w (x86mmx VR64:$src)))],
IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
+let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (bitconvert GR64:$src))],
@@ -262,7 +263,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
// These are 64 bit moves, but since the OS X assembler doesn't
// recognize a register-register movq, we write them as
// movd.
-let SchedRW = [WriteMove] in {
+let SchedRW = [WriteMove], isBitcast = 1 in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -303,7 +304,7 @@ def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
(ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (bitconvert
- (i64 (vector_extract (v2i64 VR128:$src),
+ (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))))))],
IIC_MMX_MOVQ_RR>;
@@ -326,6 +327,7 @@ def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
}
} // SchedRW
+let Predicates = [HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
@@ -355,6 +357,7 @@ defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
MMX_INTALU_ITINS, 1>;
defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
MMX_INTALU_ITINS, 1>;
+let Predicates = [HasSSE2] in
defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
MMX_INTALUQ_ITINS, 1>;
defm MMX_PADDSB : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
@@ -382,6 +385,7 @@ defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
MMX_INTALU_ITINS>;
defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
MMX_INTALU_ITINS>;
+let Predicates = [HasSSE2] in
defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
MMX_INTALUQ_ITINS>;
@@ -408,8 +412,10 @@ defm MMX_PMULLW : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw", int_x86_mmx_pmulh_w,
MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE1] in
defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE2] in
defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
MMX_PMUL_ITINS, 1>;
let isCommutable = 1 in
@@ -422,6 +428,7 @@ defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>;
+let Predicates = [HasSSE1] in {
defm MMX_PAVGB : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
MMX_MISC_FUNC_ITINS, 1>;
defm MMX_PAVGW : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
@@ -439,6 +446,7 @@ defm MMX_PMAXSW : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
defm MMX_PSADBW : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
MMX_PSADBW_ITINS, 1>;
+}
defm MMX_PSIGNB : SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
MMX_MISC_FUNC_ITINS>;
@@ -594,6 +602,7 @@ let Constraints = "$src1 = $dst" in {
}
// Extract / Insert
+let Predicates = [HasSSE1] in
def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -601,6 +610,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
imm:$src2))],
IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
+let Predicates = [HasSSE1] in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
(outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
@@ -618,8 +628,10 @@ let Constraints = "$src1 = $dst" in {
imm:$src3))],
IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
+}
// Mask creation
+let Predicates = [HasSSE1] in
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR64:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
@@ -639,12 +651,12 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
// Misc.
let SchedRW = [WriteShuffle] in {
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in
def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
IIC_MMX_MASKMOV>;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [HasSSE1,In64BitMode] in
def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
"maskmovq\t{$mask, $src|$src, $mask}",
[(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
@@ -653,10 +665,6 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
// 64-bit bit convert.
let Predicates = [HasSSE2] in {
-def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index 99386b0..7a44212 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -330,9 +330,9 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
//===----------------------------------------------------------------------===//
// A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
// A 128-bit subvector extract from the first 256-bit vector position
@@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+ def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>;
+ def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>;
}
// Bitcasts between 256-bit vector types. Return the original type since
@@ -650,10 +652,10 @@ let Predicates = [UseAVX] in {
}
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
- def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
@@ -736,7 +738,7 @@ let Predicates = [UseSSE1] in {
}
// Extract and store.
- def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
@@ -770,7 +772,7 @@ let Predicates = [UseSSE2] in {
}
// Extract and store.
- def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
@@ -935,22 +937,6 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
}
-let Predicates = [HasAVX] in {
-def : Pat<(v8i32 (X86vzmovl
- (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4i64 (X86vzmovl
- (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v8f32 (X86vzmovl
- (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4f64 (X86vzmovl
- (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-}
-
-
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
@@ -1172,12 +1158,13 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
string base_opc, InstrItinClass itin> {
- defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Predicates = [UseAVX] in
+ defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
itin>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+ let Constraints = "$src1 = $dst" in
+ defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $dst|$dst, $src2}",
itin>;
}
@@ -1188,29 +1175,31 @@ let AddedComplexity = 20 in {
}
let SchedRW = [WriteStore] in {
+let Predicates = [UseAVX] in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>, VEX;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
+ [(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>, VEX;
+}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
+ [(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// Shuffle with VMOVLPS
def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
(VMOVLPSrm VR128:$src1, addr:$src2)>;
@@ -1243,7 +1232,7 @@ let Predicates = [HasAVX] in {
let Predicates = [UseSSE1] in {
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
- def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+ def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
@@ -1297,31 +1286,33 @@ let AddedComplexity = 20 in {
let SchedRW = [WriteStore] in {
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
+let Predicates = [UseAVX] in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
+ [(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
(iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
} // SchedRW
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
// VMOVHPS patterns
def : Pat<(X86Movlhps VR128:$src1,
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
@@ -1345,7 +1336,7 @@ let Predicates = [HasAVX] in {
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (vector_extract
+ def : Pat<(store (f64 (extractelt
(v2f64 (X86VPermilpi VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDmr addr:$dst, VR128:$src)>;
@@ -1377,7 +1368,7 @@ let Predicates = [UseSSE2] in {
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(MOVHPDrm VR128:$src1, addr:$src2)>;
- def : Pat<(store (f64 (vector_extract
+ def : Pat<(store (f64 (extractelt
(v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(MOVHPDmr addr:$dst, VR128:$src)>;
@@ -2073,14 +2064,16 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+ def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
(VCVTDQ2PSrm addr:$src)>;
+}
- def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
(VCVTDQ2PSrr VR128:$src)>;
- def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
+ def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
(VCVTDQ2PSrm addr:$src)>;
def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
@@ -2149,7 +2142,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
(VCVTTPD2DQYrr VR256:$src)>;
def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
@@ -2306,7 +2299,9 @@ let Predicates = [HasAVX] in {
(VCVTDQ2PSYrr VR256:$src)>;
def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
(VCVTDQ2PSYrm addr:$src)>;
+}
+let Predicates = [HasAVX, NoVLX] in {
// Match fround and fextend for 128/256-bit conversions
def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
(VCVTPD2PSrr VR128:$src)>;
@@ -2452,9 +2447,9 @@ let Defs = [EFLAGS] in {
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
"ucomisd">, PD, VEX, VEX_LIG;
let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss">, PS, VEX, VEX_LIG;
- defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd">, PD, VEX, VEX_LIG;
}
@@ -2475,9 +2470,9 @@ let Defs = [EFLAGS] in {
"ucomisd">, PD;
let Pattern = []<dag> in {
- defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ defm COMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
"comiss">, PS;
- defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ defm COMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
"comisd">, PD;
}
@@ -2605,19 +2600,20 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
Sched<[WriteFShuffle]>;
}
-defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+let Predicates = [HasAVX, NoVLX] in {
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f32, SSEPackedSingle>, PS, VEX_4V;
-defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
-defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv2f64, SSEPackedDouble>, PD, VEX_4V;
-defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -2627,7 +2623,7 @@ let Constraints = "$src1 = $dst" in {
memopv2f64, SSEPackedDouble>, PD;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Shufp VR128:$src1,
(bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
(VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
@@ -2694,6 +2690,7 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
}
+let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedSingle>, PS, VEX_4V;
@@ -2719,7 +2716,7 @@ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
@@ -2845,8 +2842,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
ValueType OpVT128, ValueType OpVT256,
- OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX, NoVLX] in
+ OpndItins itins, bit IsCommutable = 0, Predicate prd> {
+let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
@@ -2854,7 +2851,7 @@ let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
memopv2i64, i128mem, itins, IsCommutable, 1>;
-let Predicates = [HasAVX2, NoVLX] in
+let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
IsCommutable, 0>, VEX_4V, VEX_L;
@@ -2863,13 +2860,13 @@ let Predicates = [HasAVX2, NoVLX] in
// These are ordered here for pattern ordering requirements with the fp versions
defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 1>;
+ SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
- SSE_VEC_BIT_ITINS_P, 0>;
+ SSE_VEC_BIT_ITINS_P, 0, NoVLX>;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Logical Instructions
@@ -2911,7 +2908,7 @@ let isCodeGenOnly = 1 in {
// Multiclass for vectors using the X86 logical operation aliases for FP.
multiclass sse12_fp_packed_vector_logical_alias<
bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- let Predicates = [HasAVX, NoVLX] in {
+ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V;
@@ -2923,7 +2920,7 @@ multiclass sse12_fp_packed_vector_logical_alias<
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V, VEX_L;
-
+
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
PD, VEX_4V, VEX_L;
@@ -3183,7 +3180,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE1] in {
// extracted scalar math op with insert via movss
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3198,7 +3195,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (i8 1))),
(!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3215,7 +3212,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX] in {
// extracted scalar math op with insert via blend
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
- (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+ (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
(COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3241,7 +3238,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE2] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3256,7 +3253,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [UseSSE41] in {
// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))), (i8 1))),
(!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3271,14 +3268,14 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
let Predicates = [HasAVX] in {
// extracted scalar math op with insert via movsd
def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
// extracted scalar math op with insert via blend
def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
- (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+ (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))), (i8 1))),
(!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
(COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3449,8 +3446,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
/// sse1_fp_unop_p - SSE1 unops in packed form.
multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins> {
-let Predicates = [HasAVX] in {
+ OpndItins itins, list<Predicate> prds> {
+let Predicates = prds in {
def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
@@ -3546,16 +3543,16 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
// Square root.
defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
- sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>,
sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
- sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>;
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
- sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>;
+ sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -4018,39 +4015,43 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
} // ExeDomain = SSEPackedInt
defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 1>;
+ SSE_INTALUQ_ITINS_P, 1, NoVLX>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
- SSE_INTMUL_ITINS_P, 1>;
+ SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
- SSE_INTALUQ_ITINS_P, 0>;
+ SSE_INTALUQ_ITINS_P, 0, NoVLX>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
// Intrinsic forms
defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
@@ -4067,26 +4068,18 @@ defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
-defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
- int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
-defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
- int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
-defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
- int_x86_avx2_psad_bw, SSE_PMADD, 1>;
-
-let Predicates = [HasAVX2] in
- def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
- (v32i8 VR256:$src2))),
- (VPSADBWYrr VR256:$src2, VR256:$src1)>;
let Predicates = [HasAVX] in
- def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (VPSADBWrr VR128:$src2, VR128:$src1)>;
-
-def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
- (v16i8 VR128:$src2))),
- (PSADBWrr VR128:$src2, VR128:$src1)>;
+defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
+ loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V;
+let Predicates = [HasAVX2] in
+defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
+ loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>,
+ VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
+ memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>;
let Predicates = [HasAVX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
@@ -4105,9 +4098,6 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
//===---------------------------------------------------------------------===//
let Predicates = [HasAVX, NoVLX] in {
-defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4115,9 +4105,6 @@ defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4125,14 +4112,26 @@ defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
- VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX]
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
+
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
+ Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
// 128-bit logical shifts.
def VPSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
@@ -4147,13 +4146,9 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
(v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
VEX_4V;
// PSRADQri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
let Predicates = [HasAVX2, NoVLX] in {
-defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4161,9 +4156,6 @@ defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4171,14 +4163,25 @@ defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
- VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
- SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX]
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+ VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+ SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX_Or_NoBWI]
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 ,
+ Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
// 256-bit logical shifts.
def VPSLLDQYri : PDIi8<0x73, MRM7r,
(outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
@@ -4193,8 +4196,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
(v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
VEX_4V, VEX_L;
// PSRADQYri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX2]
+} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
@@ -4247,17 +4249,17 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
//===---------------------------------------------------------------------===//
defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 1>;
+ SSE_INTALU_ITINS_P, 1, NoVLX>;
defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
- SSE_INTALU_ITINS_P, 0>;
+ SSE_INTALU_ITINS_P, 0, NoVLX>;
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
@@ -4511,40 +4513,43 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
bc_v16i8, loadv2i64, 0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
bc_v8i16, loadv2i64, 0>, VEX_4V;
- defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
- bc_v4i32, loadv2i64, 0>, VEX_4V;
- defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
- bc_v2i64, loadv2i64, 0>, VEX_4V;
-
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
bc_v16i8, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
bc_v8i16, loadv2i64, 0>, VEX_4V;
+}
+let Predicates = [HasAVX, NoVLX] in {
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
+ bc_v4i32, loadv2i64, 0>, VEX_4V;
+ defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+ bc_v2i64, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
bc_v4i32, loadv2i64, 0>, VEX_4V;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
bc_v2i64, loadv2i64, 0>, VEX_4V;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
bc_v16i16>, VEX_4V, VEX_L;
- defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
- bc_v8i32>, VEX_4V, VEX_L;
- defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
- bc_v4i64>, VEX_4V, VEX_L;
-
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
bc_v32i8>, VEX_4V, VEX_L;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
bc_v16i16>, VEX_4V, VEX_L;
+}
+let Predicates = [HasAVX2, NoVLX] in {
+ defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+ bc_v8i32>, VEX_4V, VEX_L;
+ defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+ bc_v4i64>, VEX_4V, VEX_L;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
bc_v8i32>, VEX_4V, VEX_L;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
@@ -4600,7 +4605,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
}
// Extract
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4615,7 +4620,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
Sched<[WriteShuffleLd, ReadAfterLd]>;
// Insert
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
@@ -4683,7 +4688,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
} // ExeDomain = SSEPackedInt
//===---------------------------------------------------------------------===//
-// SSE2 - Move Doubleword
+// SSE2 - Move Doubleword/Quadword
//===---------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
@@ -4770,23 +4775,23 @@ let isCodeGenOnly = 1 in {
//
def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
Sched<[WriteMove]>;
def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128:$src),
+ [(store (i32 (extractelt (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
VEX, Sched<[WriteStore]>;
def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
(iPTR 0)))], IIC_SSE_MOVD_ToGP>,
Sched<[WriteMove]>;
def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(store (i32 (vector_extract (v4i32 VR128:$src),
+ [(store (i32 (extractelt (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
@@ -4808,24 +4813,25 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
let SchedRW = [WriteMove] in {
def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
- (iPTR 0)))],
+ [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
+ (iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
VEX;
def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+ [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>;
} //SchedRW
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst),
- (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src),
+def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
@@ -4883,30 +4889,18 @@ let isCodeGenOnly = 1 in {
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
}
-//===---------------------------------------------------------------------===//
-// Patterns and instructions to describe movd/movq to XMM register zero-extends
-//
-let isCodeGenOnly = 1, SchedRW = [WriteMove] in {
-let AddedComplexity = 15 in {
-def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "movq\t{$src, $dst|$dst, $src}", // X86-64 only
- [(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))],
- IIC_SSE_MOVDQ>,
- VEX, VEX_W;
-def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
- [(set VR128:$dst, (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector GR64:$src)))))],
- IIC_SSE_MOVDQ>;
-}
-} // isCodeGenOnly, SchedRW
-
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in
+ let AddedComplexity = 15 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIrr GR64:$src)>;
+
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>;
+ }
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
@@ -4924,16 +4918,16 @@ let Predicates = [UseAVX] in {
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in
+ let AddedComplexity = 15 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(MOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (MOV64toPQIrr GR64:$src)>;
+ }
let AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
@@ -4985,12 +4979,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
+ [(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, VEX;
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
+ [(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>;
} // ExeDomain, SchedRW
@@ -5119,7 +5113,7 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
v4f32, VR128, loadv4f32, f128mem>, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
@@ -5134,7 +5128,7 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
memopv4f32, f128mem>;
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v4i32 (X86Movshdup VR128:$src)),
(VMOVSHDUPrr VR128:$src)>;
def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -5190,21 +5184,30 @@ def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (v4f64 (X86Movddup
- (scalar_to_vector (loadf64 addr:$src)))))]>,
+ (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
Sched<[WriteLoad]>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(X86Movddup (loadv2f64 addr:$src)),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+ // 256-bit version
+ def : Pat<(X86Movddup (loadv4i64 addr:$src)),
+ (VMOVDDUPYrm addr:$src)>;
+ def : Pat<(X86Movddup (v4i64 VR256:$src)),
+ (VMOVDDUPYrr VR256:$src)>;
+}
+
+let Predicates = [HasAVX] in {
def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
@@ -5212,16 +5215,6 @@ let Predicates = [HasAVX] in {
def : Pat<(X86Movddup (bc_v2f64
(v2i64 (scalar_to_vector (loadi64 addr:$src))))),
(VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-
- // 256-bit version
- def : Pat<(X86Movddup (loadv4f64 addr:$src)),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (loadv4i64 addr:$src)),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
- (VMOVDDUPYrm addr:$src)>;
- def : Pat<(X86Movddup (v4i64 VR256:$src)),
- (VMOVDDUPYrr VR256:$src)>;
}
let Predicates = [UseAVX, OptForSize] in {
@@ -5791,37 +5784,37 @@ let Predicates = [HasAVX2] in
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGN : ssse3_palignr<"palignr">;
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
- (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+ (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
let Predicates = [UseSSSE3] in {
def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
- (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+ (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
}
//===---------------------------------------------------------------------===//
@@ -6145,7 +6138,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
imm:$src2)))), addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -6170,7 +6163,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
imm:$src2)))), addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -6194,7 +6187,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
@@ -6217,7 +6210,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, REX_W;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
@@ -6285,7 +6278,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -6311,7 +6304,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -6337,7 +6330,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
}
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -6543,71 +6536,71 @@ let Predicates = [HasAVX] in {
let Predicates = [UseAVX] in {
def : Pat<(ffloor FR32:$src),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
def : Pat<(f64 (ffloor FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
def : Pat<(f64 (fnearbyint FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
def : Pat<(f64 (fceil FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
def : Pat<(f64 (frint FR64:$src)),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
- (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
def : Pat<(f64 (ftrunc FR64:$src)),
- (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+ (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
}
let Predicates = [HasAVX] in {
def : Pat<(v4f32 (ffloor VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x1))>;
+ (VROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x2))>;
+ (VROUNDPSr VR128:$src, (i32 0xA))>;
def : Pat<(v4f32 (frint VR128:$src)),
(VROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
- (VROUNDPSr VR128:$src, (i32 0x3))>;
+ (VROUNDPSr VR128:$src, (i32 0xB))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x1))>;
+ (VROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x2))>;
+ (VROUNDPDr VR128:$src, (i32 0xA))>;
def : Pat<(v2f64 (frint VR128:$src)),
(VROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
- (VROUNDPDr VR128:$src, (i32 0x3))>;
+ (VROUNDPDr VR128:$src, (i32 0xB))>;
def : Pat<(v8f32 (ffloor VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x1))>;
+ (VROUNDYPSr VR256:$src, (i32 0x9))>;
def : Pat<(v8f32 (fnearbyint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0xC))>;
def : Pat<(v8f32 (fceil VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x2))>;
+ (VROUNDYPSr VR256:$src, (i32 0xA))>;
def : Pat<(v8f32 (frint VR256:$src)),
(VROUNDYPSr VR256:$src, (i32 0x4))>;
def : Pat<(v8f32 (ftrunc VR256:$src)),
- (VROUNDYPSr VR256:$src, (i32 0x3))>;
+ (VROUNDYPSr VR256:$src, (i32 0xB))>;
def : Pat<(v4f64 (ffloor VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x1))>;
+ (VROUNDYPDr VR256:$src, (i32 0x9))>;
def : Pat<(v4f64 (fnearbyint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0xC))>;
def : Pat<(v4f64 (fceil VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x2))>;
+ (VROUNDYPDr VR256:$src, (i32 0xA))>;
def : Pat<(v4f64 (frint VR256:$src)),
(VROUNDYPDr VR256:$src, (i32 0x4))>;
def : Pat<(v4f64 (ftrunc VR256:$src)),
- (VROUNDYPDr VR256:$src, (i32 0x3))>;
+ (VROUNDYPDr VR256:$src, (i32 0xB))>;
}
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6619,47 +6612,47 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
let Predicates = [UseSSE41] in {
def : Pat<(ffloor FR32:$src),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
def : Pat<(f64 (ffloor FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
def : Pat<(f32 (fnearbyint FR32:$src)),
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
def : Pat<(f64 (fnearbyint FR64:$src)),
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
def : Pat<(f32 (fceil FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
def : Pat<(f64 (fceil FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
def : Pat<(f32 (frint FR32:$src)),
(ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
def : Pat<(f64 (frint FR64:$src)),
(ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
def : Pat<(f32 (ftrunc FR32:$src)),
- (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+ (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
def : Pat<(f64 (ftrunc FR64:$src)),
- (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+ (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
def : Pat<(v4f32 (ffloor VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x1))>;
+ (ROUNDPSr VR128:$src, (i32 0x9))>;
def : Pat<(v4f32 (fnearbyint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0xC))>;
def : Pat<(v4f32 (fceil VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x2))>;
+ (ROUNDPSr VR128:$src, (i32 0xA))>;
def : Pat<(v4f32 (frint VR128:$src)),
(ROUNDPSr VR128:$src, (i32 0x4))>;
def : Pat<(v4f32 (ftrunc VR128:$src)),
- (ROUNDPSr VR128:$src, (i32 0x3))>;
+ (ROUNDPSr VR128:$src, (i32 0xB))>;
def : Pat<(v2f64 (ffloor VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x1))>;
+ (ROUNDPDr VR128:$src, (i32 0x9))>;
def : Pat<(v2f64 (fnearbyint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0xC))>;
def : Pat<(v2f64 (fceil VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x2))>;
+ (ROUNDPDr VR128:$src, (i32 0xA))>;
def : Pat<(v2f64 (frint VR128:$src)),
(ROUNDPDr VR128:$src, (i32 0x4))>;
def : Pat<(v2f64 (ftrunc VR128:$src)),
- (ROUNDPDr VR128:$src, (i32 0x3))>;
+ (ROUNDPDr VR128:$src, (i32 0xB))>;
}
//===----------------------------------------------------------------------===//
@@ -7815,13 +7808,7 @@ def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// VBROADCAST - Load from memory and broadcast to all elements of the
// destination operand
//
-class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
- X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> :
- AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
-
-class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop, ValueType VT,
PatFrag ld_frag, SchedWrite Sched> :
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -7832,38 +7819,33 @@ class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
// AVX2 adds register forms
-class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
- Intrinsic Int, SchedWrite Sched> :
+class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
+ [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
+ Sched<[Sched]>, VEX;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+ def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
f32mem, v4f32, loadf32, WriteLoad>;
- def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+ def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
f32mem, v8f32, loadf32,
WriteFShuffleLd>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+def VBROADCASTSDYrm : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
v4f64, loadf64, WriteFShuffleLd>, VEX_L;
-def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
- int_x86_avx_vbroadcastf128_pd_256,
- WriteFShuffleLd>, VEX_L;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
- int_x86_avx2_vbroadcast_ss_ps,
- WriteFShuffle>;
- def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
- int_x86_avx2_vbroadcast_ss_ps_256,
- WriteFShuffle256>, VEX_L;
+ def VBROADCASTSSrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
+ v4f32, v4f32, WriteFShuffle>;
+ def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
+ v8f32, v4f32, WriteFShuffle256>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
- int_x86_avx2_vbroadcast_sd_pd_256,
- WriteFShuffle256>, VEX_L;
+def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
+ v4f64, v2f64, WriteFShuffle256>, VEX_L;
let mayLoad = 1, Predicates = [HasAVX2] in
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
@@ -7871,6 +7853,13 @@ def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
Sched<[WriteLoad]>, VEX, VEX_L;
+def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
+ (ins f128mem:$src),
+ "vbroadcastf128\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst,
+ (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>,
+ Sched<[WriteFShuffleLd]>, VEX, VEX_L;
+
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
@@ -7891,7 +7880,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
(iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -8080,17 +8069,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
(bitconvert (i_frag addr:$src2))))]>, VEX_4V,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
- def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ let Predicates = [HasAVX, NoVLX] in {
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffle]>;
- def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
Sched<[WriteFShuffleLd]>;
+ }// Predicates = [HasAVX, NoVLX]
}
let ExeDomain = SSEPackedSingle in {
@@ -8106,7 +8097,7 @@ let ExeDomain = SSEPackedDouble in {
loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
(VPERMILPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
@@ -8245,11 +8236,11 @@ let Predicates = [HasF16C] in {
def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
(VCVTPH2PSrm addr:$src)>;
- def : Pat<(store (f64 (vector_extract (bc_v2f64 (v8i16
+ def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
- def : Pat<(store (i64 (vector_extract (bc_v2i64 (v8i16
+ def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
addr:$dst),
(VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
@@ -8309,97 +8300,62 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
//
multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
- Intrinsic Int128, Intrinsic Int256> {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ ValueType OpVT128, ValueType OpVT256, Predicate prd> {
+ let Predicates = [HasAVX2, prd] in {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (Int128 VR128:$src))]>,
+ [(set VR128:$dst,
+ (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle]>, VEX;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
- (Int128 (scalar_to_vector (ld_frag addr:$src))))]>,
+ (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (Int256 VR128:$src))]>,
+ [(set VR256:$dst,
+ (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
- (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+ (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX, VEX_L;
+
+ // Provide aliases for broadcast from the same register class that
+ // automatically does the extract.
+ def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
+ (!cast<Instruction>(NAME#"Yrr")
+ (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
+ }
}
defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
- int_x86_avx2_pbroadcastb_128,
- int_x86_avx2_pbroadcastb_256>;
+ v16i8, v32i8, NoVLX_Or_NoBWI>;
defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
- int_x86_avx2_pbroadcastw_128,
- int_x86_avx2_pbroadcastw_256>;
+ v8i16, v16i16, NoVLX_Or_NoBWI>;
defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
- int_x86_avx2_pbroadcastd_128,
- int_x86_avx2_pbroadcastd_256>;
+ v4i32, v8i32, NoVLX>;
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
- int_x86_avx2_pbroadcastq_128,
- int_x86_avx2_pbroadcastq_256>;
+ v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2] in {
- def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
- (VPBROADCASTBrm addr:$src)>;
- def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
- (VPBROADCASTBYrm addr:$src)>;
- def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWrm addr:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
- (VPBROADCASTWYrm addr:$src)>;
- def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDrm addr:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
- (VPBROADCASTDYrm addr:$src)>;
- def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
- (VPBROADCASTQrm addr:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
- (VPBROADCASTQYrm addr:$src)>;
-
- def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
- (VPBROADCASTBrr VR128:$src)>;
- def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
- (VPBROADCASTBYrr VR128:$src)>;
- def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
- (VPBROADCASTWrr VR128:$src)>;
- def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
- (VPBROADCASTWYrr VR128:$src)>;
- def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
- (VPBROADCASTDrr VR128:$src)>;
- def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
- (VPBROADCASTDYrr VR128:$src)>;
- def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
- (VPBROADCASTQrr VR128:$src)>;
- def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
- (VPBROADCASTQYrr VR128:$src)>;
- def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
- (VBROADCASTSSrr VR128:$src)>;
- def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
- (VBROADCASTSSYrr VR128:$src)>;
- def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
- (VPBROADCASTQrr VR128:$src)>;
- def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
- (VBROADCASTSDYrr VR128:$src)>;
+ // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+ // This means we'll encounter truncated i32 loads; match that here.
+ def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
+ def : Pat<(v8i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWrm addr:$src)>;
+ def : Pat<(v16i16 (X86VBroadcast
+ (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+ (VPBROADCASTWYrm addr:$src)>;
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
- def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
- (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
- (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
- (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
- (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
- sub_xmm)))>;
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
(VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
sub_xmm)))>;
@@ -8598,7 +8554,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -8722,16 +8678,16 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
(VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
(VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
(VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
(VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
@@ -8776,10 +8732,10 @@ def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0)
(VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
VR128:$mask)>;
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
(VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
(VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
@@ -8804,10 +8760,10 @@ def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0)
(VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
VR256:$mask)>;
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
(VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
(VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
@@ -8865,12 +8821,13 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>;
}
-defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
-defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
-defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
-defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
-defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
+let Predicates = [HasAVX2, NoVLX] in {
+ defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+ defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+}
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
@@ -8905,3 +8862,59 @@ let mayLoad = 1, Constraints
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
}
}
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for FR128, f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+def : Pat<(store (f128 FR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
+
+def : Pat<(loadf128 addr:$src),
+ (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
+
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86fand FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(and FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86for FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(or FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
+ (COPY_TO_REGCLASS
+ (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+ FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(xor FR128:$src1, FR128:$src2),
+ (COPY_TO_REGCLASS
+ (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+ (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index caecf70..c1df978 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -31,21 +31,21 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
} // Uses = [CL]
-def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
-def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
OpSize16;
-def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"shl{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>,
OpSize32;
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"shl{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -85,19 +85,19 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t{%cl, $dst|$dst, cl}",
[(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
-def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHL16mi : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, u8imm:$src),
"shl{w}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHL32mi : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, u8imm:$src),
"shl{l}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src),
"shl{q}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -137,18 +137,18 @@ def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
}
-def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2),
"shr{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHR16ri : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"shr{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHR32ri : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"shr{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$src2),
"shr{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
@@ -185,19 +185,19 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t{%cl, $dst|$dst, cl}",
[(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
}
-def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHR16mi : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, u8imm:$src),
"shr{w}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHR32mi : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, u8imm:$src),
"shr{l}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src),
"shr{q}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -241,20 +241,20 @@ def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
IIC_SR>;
}
-def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"sar{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
IIC_SR>;
-def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SAR16ri : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"sar{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SAR32ri : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"sar{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"sar{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -298,19 +298,19 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SAR16mi : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, u8imm:$src),
"sar{w}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SAR32mi : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, u8imm:$src),
"sar{l}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src),
"sar{q}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -342,7 +342,7 @@ let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
@@ -350,7 +350,7 @@ def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
let Uses = [CL] in
def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -358,7 +358,7 @@ def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
let Uses = [CL] in
def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
@@ -367,7 +367,7 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
"rcl{q}\t$dst", [], IIC_SR>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
@@ -376,7 +376,7 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t$dst", [], IIC_SR>;
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
@@ -384,7 +384,7 @@ def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
let Uses = [CL] in
def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -392,7 +392,7 @@ def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
let Uses = [CL] in
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
@@ -400,7 +400,7 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
@@ -411,36 +411,36 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
let SchedRW = [WriteShiftLd, WriteRMW] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
"rcl{q}\t$dst", [], IIC_SR>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
"rcr{b}\t$dst", [], IIC_SR>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t$dst", [], IIC_SR>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
let Uses = [CL] in {
@@ -482,19 +482,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
}
-def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"rol{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROL16ri : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"rol{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROL32ri : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"rol{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -537,19 +537,19 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
[(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>;
-def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+def ROL16mi : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, u8imm:$src1),
"rol{w}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>, OpSize16;
-def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+def ROL32mi : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, u8imm:$src1),
"rol{l}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>, OpSize32;
-def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1),
"rol{q}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
IIC_SR>;
@@ -589,19 +589,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
[(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
}
-def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
"ror{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROR16ri : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
"ror{w}\t{$src2, $dst|$dst, $src2}",
[(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize16;
-def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROR32ri : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
"ror{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
- (ins GR64:$src1, i8imm:$src2),
+ (ins GR64:$src1, u8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
IIC_SR>;
@@ -644,19 +644,19 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
}
-def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
-def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+def ROR16mi : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, u8imm:$src),
"ror{w}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize16;
-def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+def ROR32mi : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, u8imm:$src),
"ror{l}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>, OpSize32;
-def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
"ror{q}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
IIC_SR>;
@@ -727,42 +727,42 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
let isCommutable = 1 in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
(outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ (ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
(outs GR16:$dst),
- (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+ (ins GR16:$src1, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
(outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ (ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
(outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+ (ins GR32:$src1, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
(outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ (ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+ (ins GR64:$src1, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
@@ -801,14 +801,14 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
}
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD16_MEM_IM>,
TB, OpSize16;
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+ (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -816,14 +816,14 @@ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
TB, OpSize16;
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi32 addr:$dst), GR32:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD32_MEM_IM>,
TB, OpSize32;
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
- (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+ (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -831,14 +831,14 @@ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
TB, OpSize32;
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
- (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+ (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
(i8 imm:$src3)), addr:$dst)],
@@ -860,12 +860,12 @@ def ROT64L2R_imm8 : SDNodeXForm<imm, [{
multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
let hasSideEffects = 0 in {
- def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+ def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShift]>;
let mayLoad = 1 in
def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, i8imm:$src2),
+ (ins x86memop:$src1, u8imm:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TAXD, VEX, Sched<[WriteShiftLd]>;
}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 0350566..85e17f5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -44,7 +44,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
let SchedRW = [WriteSystem] in {
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)], IIC_INT>;
@@ -60,12 +60,6 @@ def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [],
IIC_SYS_ENTER_EXIT>, TB;
def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", [],
IIC_SYS_ENTER_EXIT>, TB, Requires<[In64BitMode]>;
-
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize16;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>,
- OpSize32;
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
- Requires<[In64BitMode]>;
} // SchedRW
def : Pat<(debugtrap),
@@ -88,13 +82,13 @@ def IN32rr : I<0xED, RawFrm, (outs), (ins),
"in{l}\t{%dx, %eax|eax, dx}", [], IIC_IN_RR>, OpSize32;
let Defs = [AL] in
-def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+def IN8ri : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
"in{b}\t{$port, %al|al, $port}", [], IIC_IN_RI>;
let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{w}\t{$port, %ax|ax, $port}", [], IIC_IN_RI>, OpSize16;
let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
"in{l}\t{$port, %eax|eax, $port}", [], IIC_IN_RI>, OpSize32;
let Uses = [DX, AL] in
@@ -108,13 +102,13 @@ def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
"out{l}\t{%eax, %dx|dx, eax}", [], IIC_OUT_RR>, OpSize32;
let Uses = [AL] in
-def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+def OUT8ir : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
"out{b}\t{%al, $port|$port, al}", [], IIC_OUT_IR>;
let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{w}\t{%ax, $port|$port, ax}", [], IIC_OUT_IR>, OpSize16;
let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
"out{l}\t{%eax, $port|$port, eax}", [], IIC_OUT_IR>, OpSize32;
} // SchedRW
@@ -478,39 +472,60 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
+let Predicates = [HasXSAVE] in {
let Defs = [EDX, EAX], Uses = [ECX] in
def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
let Uses = [EDX, EAX, ECX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+}
-let Uses = [RDX, RAX] in {
- def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsave\t$dst", []>, TB;
- def XSAVE64 : RI<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsave64\t$dst", []>, TB, Requires<[In64BitMode]>;
+let Uses = [EDX, EAX] in {
+let Predicates = [HasXSAVE] in {
+ def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsave\t$dst",
+ [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB;
+ def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsave64\t$dst",
+ [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
- "xrstor\t$dst", []>, TB;
+ "xrstor\t$dst",
+ [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB;
def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
- "xrstor64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
- "xsaveopt\t$dst", []>, PS;
- def XSAVEOPT64 : RI<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
- "xsaveopt64\t$dst", []>, PS, Requires<[In64BitMode]>;
-
+ "xrstor64\t$dst",
+ [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEOPT] in {
+ def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+ "xsaveopt\t$dst",
+ [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB;
+ def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+ "xsaveopt64\t$dst",
+ [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEC] in {
+ def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsavec\t$dst",
+ [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB;
+ def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+ "xsavec64\t$dst",
+ [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVES] in {
+ def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xsaves\t$dst",
+ [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB;
+ def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+ "xsaves64\t$dst",
+ [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
- "xrstors\t$dst", []>, TB;
+ "xrstors\t$dst",
+ [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB;
def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
- "xrstors64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVEC : I<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsavec\t$dst", []>, TB;
- def XSAVEC64 : RI<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
- "xsavec64\t$dst", []>, TB, Requires<[In64BitMode]>;
- def XSAVES : I<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
- "xsaves\t$dst", []>, TB;
- def XSAVES64 : RI<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
- "xsaves64\t$dst", []>, TB, Requires<[In64BitMode]>;
+ "xrstors64\t$dst",
+ [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
}
+} // Uses
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -534,6 +549,12 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
}
let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+//==-----------------------------------------------------------------------===//
+// PKU - enable protection key
+let Defs = [EAX, EDX], Uses = [ECX] in
+ def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+let Uses = [EAX, ECX, EDX] in
+ def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
//===----------------------------------------------------------------------===//
// FS/GS Base Instructions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index 8455b8d..4cb2304 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -83,57 +83,64 @@ let ExeDomain = SSEPackedDouble in {
defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
}
-multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128> {
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, XOP_4VOp3;
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
+ XOP_4VOp3, Sched<[WriteVarVecShift]>;
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2))))]>,
- XOP_4V, VEX_W;
+ (vt128 (OpNode (vt128 VR128:$src1),
+ (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
+ XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>;
def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int (bitconvert (loadv2i64 addr:$src1)), VR128:$src2))]>,
- XOP_4VOp3;
+ (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+ (vt128 VR128:$src2))))]>,
+ XOP_4VOp3, Sched<[WriteVarVecShift, ReadAfterLd]>;
}
let ExeDomain = SSEPackedInt in {
- defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
- defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
- defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
- defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
- defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
- defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
- defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
- defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
- defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
- defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
- defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
- defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+ defm VPROTB : xop3op<0x90, "vprotb", X86vprot, v16i8>;
+ defm VPROTD : xop3op<0x92, "vprotd", X86vprot, v4i32>;
+ defm VPROTQ : xop3op<0x93, "vprotq", X86vprot, v2i64>;
+ defm VPROTW : xop3op<0x91, "vprotw", X86vprot, v8i16>;
+ defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8>;
+ defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32>;
+ defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64>;
+ defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16>;
+ defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8>;
+ defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32>;
+ defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64>;
+ defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16>;
}
-multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType vt128> {
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i8imm:$src2),
+ (ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, imm:$src2))]>, XOP;
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, XOP;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, i8imm:$src2),
+ (ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
+ (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>, XOP;
}
let ExeDomain = SSEPackedInt in {
- defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
- defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
- defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
- defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+ defm VPROTB : xop3opimm<0xC0, "vprotb", X86vproti, v16i8>;
+ defm VPROTD : xop3opimm<0xC2, "vprotd", X86vproti, v4i32>;
+ defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vproti, v2i64>;
+ defm VPROTW : xop3opimm<0xC1, "vprotw", X86vproti, v8i16>;
}
// Instruction where second source can be memory, but third must be register
@@ -170,30 +177,34 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where second source can be memory, third must be imm8
-multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
+multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
let isCommutable = 1 in
def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, i8immZExt3:$cc))]>,
+ [(set VR128:$dst,
+ (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+ i8immZExt3:$cc)))]>,
XOP_4V;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
!strconcat("vpcom${cc}", Suffix,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
- (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- i8immZExt3:$cc))]>, XOP_4V;
+ (vt128 (OpNode (vt128 VR128:$src1),
+ (vt128 (bitconvert (loadv2i64 addr:$src2))),
+ i8immZExt3:$cc)))]>,
+ XOP_4V;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ (ins VR128:$src1, VR128:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V;
let mayLoad = 1 in
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V;
@@ -201,14 +212,14 @@ multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
}
let ExeDomain = SSEPackedInt in { // SSE integer instructions
- defm VPCOMB : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
- defm VPCOMW : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
- defm VPCOMD : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
- defm VPCOMQ : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
- defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
- defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
- defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
- defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
+ defm VPCOMB : xopvpcom<0xCC, "b", X86vpcom, v16i8>;
+ defm VPCOMW : xopvpcom<0xCD, "w", X86vpcom, v8i16>;
+ defm VPCOMD : xopvpcom<0xCE, "d", X86vpcom, v4i32>;
+ defm VPCOMQ : xopvpcom<0xCF, "q", X86vpcom, v2i64>;
+ defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8>;
+ defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16>;
+ defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32>;
+ defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>;
}
// Instruction where either second or third source can be memory
@@ -270,42 +281,52 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
let ExeDomain = SSEPackedInt in
defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+let Predicates = [HasXOP] in {
+ def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
+ (X86andnp VR128:$src3, VR128:$src2))),
+ (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+ def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
+ (X86andnp VR256:$src3, VR256:$src2))),
+ (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+}
+
multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+ (ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
VEX_W, MemOp4;
def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+ (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR128:$dst,
(Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+ (ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
(Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
VEX_W, MemOp4, VEX_L;
def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+ (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set VR256:$dst,
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 2c8b95b..dc6d85d 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -18,14 +18,19 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
- CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
- INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
- INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
- VPERM_3OP_MASKZ,
- INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
- EXPAND_FROM_MEM, BLEND
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
+ CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM,
+ INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
+ INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
+ INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
+ FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+ VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+ INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
+ TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
+ EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+ TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
struct IntrinsicData {
@@ -138,6 +143,42 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -209,6 +250,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(avx2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -241,6 +284,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
@@ -274,16 +318,56 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2sd32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+ X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
-
+ X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
+ X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
+ X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
@@ -371,6 +455,50 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf32x8_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK,
+ X86ISD::SUBV_BROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti32x8_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_256, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC,
+ X86ISD::SHUF128, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -388,6 +516,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+ X86ISD::FSETCC),
+ X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+ X86ISD::FSETCC),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0),
X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -415,7 +547,184 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
-
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CONFLICT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTDQ2PD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0), // no rm
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP), //er
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP_ROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP_ROUND, X86ISD::VFPROUND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_EXTEND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_EXTEND, X86ISD::VFPEXT),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
+ ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTUDQ2PD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0), // no rm
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
+ ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
@@ -452,6 +761,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
@@ -464,6 +781,62 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FGETEXP_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FGETEXP_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
+ ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -472,10 +845,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
- X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
- X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMAX, X86ISD::FMAX_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMAX, X86ISD::FMAX_RND),
X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
@@ -484,10 +857,32 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
- X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
- X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FMIN, X86ISD::FMIN_RND),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVDDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK,
+ X86ISD::MOVSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,
+ X86ISD::MOVSS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movshdup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSHDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_128, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_256, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
+ X86ISD::MOVSLDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
@@ -554,6 +949,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::PALIGNR, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@@ -596,6 +997,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDUBSW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
@@ -644,6 +1057,114 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -700,6 +1221,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_q, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_d, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
@@ -728,16 +1255,98 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::RNDSCALE, 0),
+ X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::RNDSCALE, 0),
+ X86ISD::VRNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -750,6 +1359,38 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::SCALEF, 0),
X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUF128, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::SHUFP, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -758,6 +1399,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FSQRT_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
+ X86ISD::FSQRT_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
@@ -782,9 +1427,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
- X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKH, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::UNPCKL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
+ X86ISD::VALIGN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+ ISD::FP16_TO_FP, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM,
+ ISD::FP_TO_FP16, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
@@ -821,7 +1511,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -852,54 +1541,56 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK,
+ X86ISD::VPERMILPI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
+ X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
- X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
@@ -910,7 +1601,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
+ X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
@@ -959,14 +1661,59 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+ X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
+ X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
+ X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+ X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+ X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0),
X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -1017,6 +1764,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_b, INTR_TYPE_2OP, X86ISD::AVG, 0),
+ X86_INTRINSIC_DATA(sse2_pavg_w, INTR_TYPE_2OP, X86ISD::AVG, 0),
X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
@@ -1024,6 +1773,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
@@ -1066,12 +1816,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxdq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxwd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(sse41_pmovsxwq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxbw, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
@@ -1105,7 +1849,31 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(ssse3_psign_b_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
X86_INTRINSIC_DATA(ssse3_psign_d_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
- X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0)
+ X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+ X86_INTRINSIC_DATA(xop_vpcomb, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomd, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomq, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vpcomub, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomud, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+ X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+ X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotdi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotq, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotqi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vprotw, INTR_TYPE_2OP, X86ISD::VPROT, 0),
+ X86_INTRINSIC_DATA(xop_vprotwi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+ X86_INTRINSIC_DATA(xop_vpshab, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshad, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshaq, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshaw, INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+ X86_INTRINSIC_DATA(xop_vpshlb, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshld, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshlq, INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+ X86_INTRINSIC_DATA(xop_vpshlw, INTR_TYPE_2OP, X86ISD::VPSHL, 0)
};
/*
@@ -1128,6 +1896,102 @@ static void verifyIntrinsicTables() {
std::is_sorted(std::begin(IntrinsicsWithChain),
std::end(IntrinsicsWithChain)) &&
"Intrinsic data tables should be sorted by Intrinsic ID");
+ assert((std::adjacent_find(std::begin(IntrinsicsWithoutChain),
+ std::end(IntrinsicsWithoutChain)) ==
+ std::end(IntrinsicsWithoutChain)) &&
+ (std::adjacent_find(std::begin(IntrinsicsWithChain),
+ std::end(IntrinsicsWithChain)) ==
+ std::end(IntrinsicsWithChain)) &&
+ "Intrinsic data tables should have unique entries");
+}
+
+// X86 specific compare constants.
+// They must be kept in synch with avxintrin.h
+#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
+#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
+#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
+#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
+#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
+#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
+#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
+#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */
+#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
+#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */
+#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
+#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */
+#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */
+#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */
+#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */
+#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */
+#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */
+#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */
+#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */
+#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */
+#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */
+#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */
+#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */
+#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */
+#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */
+#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */
+#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */
+#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */
+#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */
+#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */
+#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
+#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */
+
+/*
+* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
+* Return tuple <isOrdered, X86 condcode>
+*/
+static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
+ ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
+ unsigned IntImm = CImm->getZExtValue();
+ // On a floating point condition, the flags are set as follows:
+ // ZF PF CF op
+ // 0 | 0 | 0 | X > Y
+ // 0 | 0 | 1 | X < Y
+ // 1 | 0 | 0 | X == Y
+ // 1 | 1 | 1 | unordered
+ switch (IntImm) {
+ default: llvm_unreachable("Invalid floating point compare value for Comi!");
+ case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling)
+ case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling)
+ return std::make_tuple(true, X86::COND_E);
+ case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling)
+ case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling)
+ return std::make_tuple(false , X86::COND_E);
+ case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling)
+ case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_B);
+ case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling)
+ case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
+ return std::make_tuple(false , X86::COND_B);
+ case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling)
+ case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_BE);
+ case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling)
+ case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_BE);
+ case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling)
+ case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_A);
+ case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling)
+ case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_A);
+ case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling)
+ case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
+ return std::make_tuple(true, X86::COND_AE);
+ case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling)
+ case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling)
+ return std::make_tuple(false, X86::COND_AE);
+ case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling)
+ case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling)
+ return std::make_tuple(true, X86::COND_NE);
+ case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling)
+ case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling)
+ return std::make_tuple(false, X86::COND_NE);
+ }
}
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 3415ced..e186f70 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -92,7 +92,6 @@ namespace llvm {
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
CurrentShadowSize += Code.size();
if (CurrentShadowSize >= RequiredShadowSize)
InShadow = false; // The shadow is big enough. Stop counting.
@@ -128,7 +127,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = MF.getDataLayout();
assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
MCSymbol *Sym = nullptr;
@@ -151,7 +150,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
if (!Suffix.empty())
- Name += DL->getPrivateGlobalPrefix();
+ Name += DL.getPrivateGlobalPrefix();
unsigned PrefixLen = Name.size();
@@ -159,7 +158,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
const GlobalValue *GV = MO.getGlobal();
AsmPrinter.getNameWithPrefix(Name, GV);
} else if (MO.isSymbol()) {
- Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+ Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
} else if (MO.isMBB()) {
assert(Suffix.empty());
Sym = MO.getMBB()->getSymbol();
@@ -461,6 +460,7 @@ ReSimplify:
// Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
// if one of the registers is extended, but other isn't.
+ case X86::VMOVZPQILo2PQIrr:
case X86::VMOVAPDrr:
case X86::VMOVAPDYrr:
case X86::VMOVAPSrr:
@@ -478,18 +478,19 @@ ReSimplify:
unsigned NewOpc;
switch (OutMI.getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+ case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
+ case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
+ case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
+ case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
+ case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
+ case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
+ case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
+ case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
+ case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
+ case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
+ case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
+ case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
+ case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
}
OutMI.setOpcode(NewOpc);
}
@@ -532,6 +533,23 @@ ReSimplify:
break;
}
+ case X86::CLEANUPRET: {
+ // Replace CATCHRET with the appropriate RET.
+ OutMI = MCInst();
+ OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
+ break;
+ }
+
+ case X86::CATCHRET: {
+ // Replace CATCHRET with the appropriate RET.
+ const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
+ unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+ OutMI = MCInst();
+ OutMI.setOpcode(getRetOpcode(Subtarget));
+ OutMI.addOperand(MCOperand::createReg(ReturnReg));
+ break;
+ }
+
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
case X86::TAILJMPr:
case X86::TAILJMPd:
@@ -598,17 +616,29 @@ ReSimplify:
case X86::RELEASE_MOV32mi: OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
case X86::RELEASE_ADD8mi: OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
+ case X86::RELEASE_ADD8mr: OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
case X86::RELEASE_ADD32mi: OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
+ case X86::RELEASE_ADD32mr: OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
+ case X86::RELEASE_ADD64mr: OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
case X86::RELEASE_AND8mi: OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
+ case X86::RELEASE_AND8mr: OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
case X86::RELEASE_AND32mi: OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
+ case X86::RELEASE_AND32mr: OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
+ case X86::RELEASE_AND64mr: OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
case X86::RELEASE_OR8mi: OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
+ case X86::RELEASE_OR8mr: OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
case X86::RELEASE_OR32mi: OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
+ case X86::RELEASE_OR32mr: OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
case X86::RELEASE_OR64mi32: OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
+ case X86::RELEASE_OR64mr: OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
case X86::RELEASE_XOR8mi: OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
+ case X86::RELEASE_XOR8mr: OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
case X86::RELEASE_XOR32mi: OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
+ case X86::RELEASE_XOR32mr: OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
+ case X86::RELEASE_XOR64mr: OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
case X86::RELEASE_INC8m: OutMI.setOpcode(X86::INC8m); goto ReSimplify;
case X86::RELEASE_INC16m: OutMI.setOpcode(X86::INC16m); goto ReSimplify;
case X86::RELEASE_INC32m: OutMI.setOpcode(X86::INC32m); goto ReSimplify;
@@ -875,7 +905,10 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
MCInst LoadMI;
LoadMI.setOpcode(LoadOpcode);
- LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
+ if (LoadDefRegister != X86::NoRegister)
+ LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
E = MI.operands_end();
I != E; ++I)
@@ -1062,6 +1095,18 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86ATTInstPrinter::getRegisterName(Reg));
break;
}
+ case X86::CLEANUPRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CLEANUPRET");
+ break;
+ }
+
+ case X86::CATCHRET: {
+ // Lower these as normal, but add some comments.
+ OutStreamer->AddComment("CATCHRET");
+ break;
+ }
+
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
@@ -1095,12 +1140,30 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
.addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+ const X86FrameLowering* FrameLowering =
+ MF->getSubtarget<X86Subtarget>().getFrameLowering();
+ bool hasFP = FrameLowering->hasFP(*MF);
+
+ // TODO: This is needed only if we require precise CFA.
+ bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
+ !OutStreamer->getDwarfFrameInfos().back().End;
+
+ int stackGrowth = -RI->getSlotSize();
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
+ }
+
// Emit the label.
OutStreamer->EmitLabel(PICBase);
// popl $reg
EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
.addReg(MI->getOperand(0).getReg()));
+
+ if (HasActiveDwarfFrame && !hasFP) {
+ OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
+ }
return;
}
@@ -1206,19 +1269,48 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- // Lower PSHUFB and VPERMILP normally but add a comment if we can find
- // a constant shuffle mask. We won't be able to do this at the MC layer
- // because the mask isn't an immediate.
+ // Lower PSHUFB and VPERMILP normally but add a comment if we can find
+ // a constant shuffle mask. We won't be able to do this at the MC layer
+ // because the mask isn't an immediate.
case X86::PSHUFBrm:
case X86::VPSHUFBrm:
- case X86::VPSHUFBYrm: {
+ case X86::VPSHUFBYrm:
+ case X86::VPSHUFBZ128rm:
+ case X86::VPSHUFBZ128rmk:
+ case X86::VPSHUFBZ128rmkz:
+ case X86::VPSHUFBZ256rm:
+ case X86::VPSHUFBZ256rmk:
+ case X86::VPSHUFBZ256rmkz:
+ case X86::VPSHUFBZrm:
+ case X86::VPSHUFBZrmk:
+ case X86::VPSHUFBZrmkz: {
if (!OutStreamer->isVerboseAsm())
break;
- assert(MI->getNumOperands() > 5 &&
- "We should always have at least 5 operands!");
+ unsigned SrcIdx, MaskIdx;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::PSHUFBrm:
+ case X86::VPSHUFBrm:
+ case X86::VPSHUFBYrm:
+ case X86::VPSHUFBZ128rm:
+ case X86::VPSHUFBZ256rm:
+ case X86::VPSHUFBZrm:
+ SrcIdx = 1; MaskIdx = 5; break;
+ case X86::VPSHUFBZ128rmkz:
+ case X86::VPSHUFBZ256rmkz:
+ case X86::VPSHUFBZrmkz:
+ SrcIdx = 2; MaskIdx = 6; break;
+ case X86::VPSHUFBZ128rmk:
+ case X86::VPSHUFBZ256rmk:
+ case X86::VPSHUFBZrmk:
+ SrcIdx = 3; MaskIdx = 7; break;
+ }
+
+ assert(MI->getNumOperands() >= 6 &&
+ "We should always have at least 6 operands!");
const MachineOperand &DstOp = MI->getOperand(0);
- const MachineOperand &SrcOp = MI->getOperand(1);
- const MachineOperand &MaskOp = MI->getOperand(5);
+ const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
+ const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
@@ -1240,35 +1332,53 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &SrcOp = MI->getOperand(1);
const MachineOperand &MaskOp = MI->getOperand(5);
+ unsigned ElSize;
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Invalid opcode");
+ case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break;
+ case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break;
+ }
+
if (auto *C = getConstantFromPool(*MI, MaskOp)) {
SmallVector<int, 16> Mask;
- DecodeVPERMILPMask(C, Mask);
+ DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
}
break;
}
- // For loads from a constant pool to a vector register, print the constant
- // loaded.
- case X86::MOVAPDrm:
- case X86::VMOVAPDrm:
- case X86::VMOVAPDYrm:
- case X86::MOVUPDrm:
- case X86::VMOVUPDrm:
- case X86::VMOVUPDYrm:
- case X86::MOVAPSrm:
- case X86::VMOVAPSrm:
- case X86::VMOVAPSYrm:
- case X86::MOVUPSrm:
- case X86::VMOVUPSrm:
- case X86::VMOVUPSYrm:
- case X86::MOVDQArm:
- case X86::VMOVDQArm:
- case X86::VMOVDQAYrm:
- case X86::MOVDQUrm:
- case X86::VMOVDQUrm:
- case X86::VMOVDQUYrm:
+#define MOV_CASE(Prefix, Suffix) \
+ case X86::Prefix##MOVAPD##Suffix##rm: \
+ case X86::Prefix##MOVAPS##Suffix##rm: \
+ case X86::Prefix##MOVUPD##Suffix##rm: \
+ case X86::Prefix##MOVUPS##Suffix##rm: \
+ case X86::Prefix##MOVDQA##Suffix##rm: \
+ case X86::Prefix##MOVDQU##Suffix##rm:
+
+#define MOV_AVX512_CASE(Suffix) \
+ case X86::VMOVDQA64##Suffix##rm: \
+ case X86::VMOVDQA32##Suffix##rm: \
+ case X86::VMOVDQU64##Suffix##rm: \
+ case X86::VMOVDQU32##Suffix##rm: \
+ case X86::VMOVDQU16##Suffix##rm: \
+ case X86::VMOVDQU8##Suffix##rm: \
+ case X86::VMOVAPS##Suffix##rm: \
+ case X86::VMOVAPD##Suffix##rm: \
+ case X86::VMOVUPS##Suffix##rm: \
+ case X86::VMOVUPD##Suffix##rm:
+
+#define CASE_ALL_MOV_RM() \
+ MOV_CASE(, ) /* SSE */ \
+ MOV_CASE(V, ) /* AVX-128 */ \
+ MOV_CASE(V, Y) /* AVX-256 */ \
+ MOV_AVX512_CASE(Z) \
+ MOV_AVX512_CASE(Z256) \
+ MOV_AVX512_CASE(Z128)
+
+ // For loads from a constant pool to a vector register, print the constant
+ // loaded.
+ CASE_ALL_MOV_RM()
if (!OutStreamer->isVerboseAsm())
break;
if (MI->getNumOperands() > 4)
@@ -1302,7 +1412,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
- CS << CI->getZExtValue();
+ if (CI->getBitWidth() <= 64) {
+ CS << CI->getZExtValue();
+ } else {
+ // print multi-word constant as (w0,w1)
+ auto Val = CI->getValue();
+ CS << "(";
+ for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+ if (i > 0)
+ CS << ",";
+ CS << Val.getRawData()[i];
+ }
+ CS << ")";
+ }
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
SmallString<32> Str;
CF->getValueAPF().toString(Str);
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
index ac2cdc8c..c9e636f 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index e6db970..3a7a98d 100644
--- a/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -84,8 +84,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// of pushes to pass function parameters.
bool HasPushSequences = false;
- /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
- /// slot for the frame pointer.
+ /// True if the function recovers from an SEH exception, and therefore needs
+ /// to spill and restore the frame pointer.
bool HasSEHFramePtrSave = false;
/// The frame index of a stack object containing the original frame pointer
@@ -100,7 +100,7 @@ private:
public:
X86MachineFunctionInfo() = default;
- explicit X86MachineFunctionInfo(MachineFunction &MF) {};
+ explicit X86MachineFunctionInfo(MachineFunction &MF) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
new file mode 100644
index 0000000..58020d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -0,0 +1,326 @@
+//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass that performs some optimizations with LEA
+// instructions in order to improve code size.
+// Currently, it does one thing:
+// 1) Address calculations in load and store instructions are replaced by
+// existing LEA def registers where possible.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-optimize-LEAs"
+
+static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
+ cl::desc("X86: Enable LEA optimizations."),
+ cl::init(false));
+
+STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+
+namespace {
+class OptimizeLEAPass : public MachineFunctionPass {
+public:
+ OptimizeLEAPass() : MachineFunctionPass(ID) {}
+
+ const char *getPassName() const override { return "X86 LEA Optimize"; }
+
+ /// \brief Loop over all of the basic blocks, replacing address
+ /// calculations in load and store instructions, if it's already
+ /// been calculated by LEA. Also, remove redundant LEAs.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ /// \brief Returns a distance between two instructions inside one basic block.
+ /// Negative result means, that instructions occur in reverse order.
+ int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
+
+ /// \brief Choose the best \p LEA instruction from the \p List to replace
+ /// address calculation in \p MI instruction. Return the address displacement
+ /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift
+ /// and \p Dist.
+ bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+ const MachineInstr &MI, MachineInstr *&LEA,
+ int64_t &AddrDispShift, int &Dist);
+
+ /// \brief Returns true if two machine operand are identical and they are not
+ /// physical registers.
+ bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2);
+
+ /// \brief Returns true if the instruction is LEA.
+ bool isLEA(const MachineInstr &MI);
+
+ /// \brief Returns true if two instructions have memory operands that only
+ /// differ by displacement. The numbers of the first memory operands for both
+ /// instructions are specified through \p N1 and \p N2. The address
+ /// displacement is returned through AddrDispShift.
+ bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+ const MachineInstr &MI2, unsigned N2,
+ int64_t &AddrDispShift);
+
+ /// \brief Find all LEA instructions in the basic block.
+ void findLEAs(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineInstr *> &List);
+
+ /// \brief Removes redundant address calculations.
+ bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
+
+ MachineRegisterInfo *MRI;
+ const X86InstrInfo *TII;
+ const X86RegisterInfo *TRI;
+
+ static char ID;
+};
+char OptimizeLEAPass::ID = 0;
+}
+
+FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
+
+int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
+ const MachineInstr &Last) {
+ const MachineBasicBlock *MBB = First.getParent();
+
+ // Both instructions must be in the same basic block.
+ assert(Last.getParent() == MBB &&
+ "Instructions are in different basic blocks");
+
+ return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
+ std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+}
+
+// Find the best LEA instruction in the List to replace address recalculation in
+// MI. Such LEA must meet these requirements:
+// 1) The address calculated by the LEA differs only by the displacement from
+// the address used in MI.
+// 2) The register class of the definition of the LEA is compatible with the
+// register class of the address base register of MI.
+// 3) Displacement of the new memory operand should fit in 1 byte if possible.
+// 4) The LEA should be as close to MI as possible, and prior to it if
+// possible.
+bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+ const MachineInstr &MI, MachineInstr *&LEA,
+ int64_t &AddrDispShift, int &Dist) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+ X86II::getOperandBias(Desc);
+
+ LEA = nullptr;
+
+ // Loop over all LEA instructions.
+ for (auto DefMI : List) {
+ int64_t AddrDispShiftTemp = 0;
+
+ // Compare instructions memory operands.
+ if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp))
+ continue;
+
+ // Make sure address displacement fits 4 bytes.
+ if (!isInt<32>(AddrDispShiftTemp))
+ continue;
+
+ // Check that LEA def register can be used as MI address base. Some
+ // instructions can use a limited set of registers as address base, for
+ // example MOV8mr_NOREX. We could constrain the register class of the LEA
+ // def to suit MI, however since this case is very rare and hard to
+ // reproduce in a test it's just more reliable to skip the LEA.
+ if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
+ MRI->getRegClass(DefMI->getOperand(0).getReg()))
+ continue;
+
+ // Choose the closest LEA instruction from the list, prior to MI if
+ // possible. Note that we took into account resulting address displacement
+ // as well. Also note that the list is sorted by the order in which the LEAs
+ // occur, so the break condition is pretty simple.
+ int DistTemp = calcInstrDist(*DefMI, MI);
+ assert(DistTemp != 0 &&
+ "The distance between two different instructions cannot be zero");
+ if (DistTemp > 0 || LEA == nullptr) {
+ // Do not update return LEA, if the current one provides a displacement
+ // which fits in 1 byte, while the new candidate does not.
+ if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
+ isInt<8>(AddrDispShift))
+ continue;
+
+ LEA = DefMI;
+ AddrDispShift = AddrDispShiftTemp;
+ Dist = DistTemp;
+ }
+
+ // FIXME: Maybe we should not always stop at the first LEA after MI.
+ if (DistTemp < 0)
+ break;
+ }
+
+ return LEA != nullptr;
+}
+
+bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1,
+ const MachineOperand &MO2) {
+ return MO1.isIdenticalTo(MO2) &&
+ (!MO1.isReg() ||
+ !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+}
+
+bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
+ Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
+}
+
+// Check if MI1 and MI2 have memory operands which represent addresses that
+// differ only by displacement.
+bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+ const MachineInstr &MI2, unsigned N2,
+ int64_t &AddrDispShift) {
+ // Address base, scale, index and segment operands must be identical.
+ static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt,
+ X86::AddrIndexReg, X86::AddrSegmentReg};
+ for (auto &N : IdenticalOpNums)
+ if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N)))
+ return false;
+
+ // Address displacement operands may differ by a constant.
+ const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp);
+ const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp);
+ if (!isIdenticalOp(*Op1, *Op2)) {
+ if (Op1->isImm() && Op2->isImm())
+ AddrDispShift = Op1->getImm() - Op2->getImm();
+ else if (Op1->isGlobal() && Op2->isGlobal() &&
+ Op1->getGlobal() == Op2->getGlobal())
+ AddrDispShift = Op1->getOffset() - Op2->getOffset();
+ else
+ return false;
+ }
+
+ return true;
+}
+
+void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineInstr *> &List) {
+ for (auto &MI : MBB) {
+ if (isLEA(MI))
+ List.push_back(const_cast<MachineInstr *>(&MI));
+ }
+}
+
+// Try to find load and store instructions which recalculate addresses already
+// calculated by some LEA and replace their memory operands with its def
+// register.
+bool OptimizeLEAPass::removeRedundantAddrCalc(
+ const SmallVectorImpl<MachineInstr *> &List) {
+ bool Changed = false;
+
+ assert(List.size() > 0);
+ MachineBasicBlock *MBB = List[0]->getParent();
+
+ // Process all instructions in basic block.
+ for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
+ MachineInstr &MI = *I++;
+ unsigned Opcode = MI.getOpcode();
+
+ // Instruction must be load or store.
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode);
+
+ // If instruction has no memory operand - skip it.
+ if (MemOpNo < 0)
+ continue;
+
+ MemOpNo += X86II::getOperandBias(Desc);
+
+ // Get the best LEA instruction to replace address calculation.
+ MachineInstr *DefMI;
+ int64_t AddrDispShift;
+ int Dist;
+ if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist))
+ continue;
+
+ // If LEA occurs before current instruction, we can freely replace
+ // the instruction. If LEA occurs after, we can lift LEA above the
+ // instruction and this way to be able to replace it. Since LEA and the
+ // instruction have similar memory operands (thus, the same def
+ // instructions for these operands), we can always do that, without
+ // worries of using registers before their defs.
+ if (Dist < 0) {
+ DefMI->removeFromParent();
+ MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+ }
+
+ // Since we can possibly extend register lifetime, clear kill flags.
+ MRI->clearKillFlags(DefMI->getOperand(0).getReg());
+
+ ++NumSubstLEAs;
+ DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
+
+ // Change instruction operands.
+ MI.getOperand(MemOpNo + X86::AddrBaseReg)
+ .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
+ MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
+ MI.getOperand(MemOpNo + X86::AddrIndexReg)
+ .ChangeToRegister(X86::NoRegister, false);
+ MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
+ MI.getOperand(MemOpNo + X86::AddrSegmentReg)
+ .ChangeToRegister(X86::NoRegister, false);
+
+ DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+
+ // Perform this optimization only if we care about code size.
+ if (!EnableX86LEAOpt || !MF.getFunction()->optForSize())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+
+ // Process all basic blocks.
+ for (auto &MBB : MF) {
+ SmallVector<MachineInstr *, 16> LEAs;
+
+ // Find all LEA instructions in basic block.
+ findLEAs(MBB, LEAs);
+
+ // If current basic block has no LEAs, move on to the next one.
+ if (LEAs.empty())
+ continue;
+
+ // Remove redundant address calculations.
+ Changed |= removeRedundantAddrCalc(LEAs);
+ }
+
+ return Changed;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 143e70b..0f425e2 100644
--- a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -93,8 +93,7 @@ FunctionPass *llvm::createX86PadShortFunctions() {
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// NOOP instructions before early exits.
bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
- MF.getFunction()->hasFnAttribute(Attribute::MinSize)) {
+ if (MF.getFunction()->optForSize()) {
return false;
}
@@ -107,7 +106,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
// Search through basic blocks and mark the ones that have early returns
ReturnBBs.clear();
VisitedBBs.clear();
- findReturns(MF.begin());
+ findReturns(&MF.front());
bool MadeChange = false;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index d8495e5..5840443 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,7 +27,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
@@ -44,12 +43,6 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
-cl::opt<bool>
-ForceStackAlign("force-align-stack",
- cl::desc("Force align the stack to the minimum alignment"
- " needed for the function."),
- cl::init(false), cl::Hidden);
-
static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -174,21 +167,34 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
if (Subtarget.isTarget64BitLP64())
return &X86::GR64_NOSPRegClass;
return &X86::GR32_NOSPRegClass;
- case 2: // Available for tailcall (not callee-saved GPRs).
- const Function *F = MF.getFunction();
- if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
- return &X86::GR64_TCW64RegClass;
- else if (Is64Bit)
- return &X86::GR64_TCRegClass;
-
- bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
- if (hasHipeCC)
- return &X86::GR32RegClass;
- return &X86::GR32_TCRegClass;
+ case 2: // NOREX GPRs.
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64_NOREXRegClass;
+ return &X86::GR32_NOREXRegClass;
+ case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
+ if (Subtarget.isTarget64BitLP64())
+ return &X86::GR64_NOREX_NOSPRegClass;
+ return &X86::GR32_NOREX_NOSPRegClass;
+ case 4: // Available for tailcall (not callee-saved GPRs).
+ return getGPRsForTailCall(MF);
}
}
const TargetRegisterClass *
+X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+ if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
+ return &X86::GR64_TCW64RegClass;
+ else if (Is64Bit)
+ return &X86::GR64_TCRegClass;
+
+ bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+ if (hasHipeCC)
+ return &X86::GR32RegClass;
+ return &X86::GR32_TCRegClass;
+}
+
+const TargetRegisterClass *
X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &X86::CCRRegClass) {
if (Is64Bit)
@@ -222,6 +228,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
+ bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
bool CallsEHReturn = MF->getMMI().callsEHReturn();
@@ -241,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_SaveList;
return CSR_64_RT_AllRegs_SaveList;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_SaveList;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
@@ -254,6 +265,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_Intel_OCL_BI_SaveList;
break;
}
+ case CallingConv::HHVM:
+ return CSR_64_HHVM_SaveList;
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_SaveList;
@@ -264,6 +277,18 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (CallsEHReturn)
return CSR_64EHRet_SaveList;
return CSR_64_SaveList;
+ case CallingConv::X86_INTR:
+ if (Is64Bit) {
+ if (HasAVX)
+ return CSR_64_AllRegs_AVX_SaveList;
+ else
+ return CSR_64_AllRegs_SaveList;
+ } else {
+ if (HasSSE)
+ return CSR_32_AllRegs_SSE_SaveList;
+ else
+ return CSR_32_AllRegs_SaveList;
+ }
default:
break;
}
@@ -284,6 +309,7 @@ const uint32_t *
X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+ bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
@@ -301,6 +327,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_RegMask;
return CSR_64_RT_AllRegs_RegMask;
+ case CallingConv::CXX_FAST_TLS:
+ if (Is64Bit)
+ return CSR_64_TLS_Darwin_RegMask;
+ break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
@@ -314,16 +344,30 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_Intel_OCL_BI_RegMask;
break;
}
+ case CallingConv::HHVM:
+ return CSR_64_HHVM_RegMask;
case CallingConv::Cold:
if (Is64Bit)
return CSR_64_MostRegs_RegMask;
break;
- default:
- break;
case CallingConv::X86_64_Win64:
return CSR_Win64_RegMask;
case CallingConv::X86_64_SysV:
return CSR_64_RegMask;
+ case CallingConv::X86_INTR:
+ if (Is64Bit) {
+ if (HasAVX)
+ return CSR_64_AllRegs_AVX_RegMask;
+ else
+ return CSR_64_AllRegs_RegMask;
+ } else {
+ if (HasSSE)
+ return CSR_32_AllRegs_SSE_RegMask;
+ else
+ return CSR_32_AllRegs_RegMask;
+ }
+ default:
+ break;
}
// Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -341,6 +385,10 @@ X86RegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
+const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
+ return CSR_64_TLS_Darwin_RegMask;
+}
+
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const X86FrameLowering *TFI = getFrameLowering(MF);
@@ -371,8 +419,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
"Stack realignment in presence of dynamic allocas is not supported with"
"this calling convention.");
- unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64,
- false);
+ unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
I.isValid(); ++I)
Reserved.set(*I);
@@ -439,6 +486,10 @@ void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
+static bool CantUseSP(const MachineFrameInfo *MFI) {
+ return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
+}
+
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -451,13 +502,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// reference locals while also adjusting the stack pointer. When we can't
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
- bool CantUseSP =
- MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
- return CantUseFP && CantUseSP;
+ return CantUseFP && CantUseSP(MFI);
}
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
- if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ if (!TargetRegisterInfo::canRealignStack(MF))
return false;
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -470,26 +519,11 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
// If a base pointer is necessary. Check that it isn't too late to reserve
// it.
- if (MFI->hasVarSizedObjects())
+ if (CantUseSP(MFI))
return MRI->canReserveReg(BasePtr);
return true;
}
-bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86FrameLowering *TFI = getFrameLowering(MF);
- const Function *F = MF.getFunction();
- unsigned StackAlign = TFI->getStackAlignment();
- bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttribute(Attribute::StackAlignment));
-
- // If we've requested that we force align the stack do so now.
- if (ForceStackAlign)
- return canRealignStack(MF);
-
- return requiresRealignment && canRealignStack(MF);
-}
-
bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
unsigned Reg, int &FrameIdx) const {
// Since X86 defines assignCalleeSavedSpillSlots which always return true
@@ -510,6 +544,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Opc = MI.getOpcode();
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
+
if (hasBasePointer(MF))
BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
else if (needsStackRealignment(MF))
@@ -524,14 +559,11 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// offset is from the traditional base pointer location. On 64-bit, the
// offset is from the SP at the end of the prologue, not the FP location. This
// matches the behavior of llvm.frameaddress.
+ unsigned IgnoredFrameReg;
if (Opc == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
- bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int Offset;
- if (IsWinEH)
- Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
- else
- Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
FI.ChangeToImmediate(Offset);
return;
}
@@ -540,7 +572,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// register as source operand, semantic is the same and destination is
// 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
- BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
+ BasePtr = getX86SubSuperRegister(BasePtr, 64);
// This must be part of a four operand memory reference. Replace the
// FrameIndex with base register with EBP. Add an offset to the offset.
@@ -553,7 +585,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
const MachineFrameInfo *MFI = MF.getFrameInfo();
FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
} else
- FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+ FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
if (BasePtr == StackPtr)
FIOffset += SPAdj;
@@ -592,193 +624,11 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
unsigned FrameReg = getFrameRegister(MF);
if (Subtarget.isTarget64BitILP32())
- FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
+ FrameReg = getX86SubSuperRegister(FrameReg, 32);
return FrameReg;
}
-namespace llvm {
-unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
- switch (VT) {
- default: return 0;
- case MVT::i8:
- if (High) {
- switch (Reg) {
- default: return getX86SubSuperRegister(Reg, MVT::i64);
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AH;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DH;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CH;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BH;
- }
- } else {
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AL;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DL;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CL;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BL;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SIL;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DIL;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BPL;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SPL;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8B;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9B;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10B;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11B;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12B;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13B;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14B;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15B;
- }
- }
- case MVT::i16:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::AX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::DX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::CX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::BX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8W;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9W;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10W;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11W;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12W;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13W;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14W;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15W;
- }
- case MVT::i32:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::EAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::EDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::ECX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::EBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::ESI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::EDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::EBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::ESP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8D;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9D;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10D;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11D;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12D;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13D;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14D;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15D;
- }
- case MVT::i64:
- switch (Reg) {
- default: return 0;
- case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
- return X86::RAX;
- case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
- return X86::RDX;
- case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
- return X86::RCX;
- case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
- return X86::RBX;
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::RSI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::RDI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::RBP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::RSP;
- case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
- return X86::R8;
- case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
- return X86::R9;
- case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
- return X86::R10;
- case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
- return X86::R11;
- case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
- return X86::R12;
- case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
- return X86::R13;
- case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
- return X86::R14;
- case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
- return X86::R15;
- }
- }
-}
-
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
- bool High) {
- unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
- if (Res == 0)
- llvm_unreachable("Unexpected register or VT");
- return Res;
-}
-
-unsigned get512BitSuperRegister(unsigned Reg) {
+unsigned llvm::get512BitSuperRegister(unsigned Reg) {
if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
return X86::ZMM0 + (Reg - X86::XMM0);
if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
@@ -787,5 +637,3 @@ unsigned get512BitSuperRegister(unsigned Reg) {
return Reg;
llvm_unreachable("Unexpected SIMD register");
}
-
-}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 8de1d0b..f014c8f 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -87,6 +87,11 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+ /// getGPRsForTailCall - Returns a register class with registers that can be
+ /// used in forming tail calls.
+ const TargetRegisterClass *
+ getGPRsForTailCall(const MachineFunction &MF) const;
+
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
@@ -96,7 +101,11 @@ public:
getCalleeSavedRegs(const MachineFunction* MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
- const uint32_t *getNoPreservedMask() const;
+ const uint32_t *getNoPreservedMask() const override;
+
+ // Calls involved in thread-local variable lookup save more registers than
+ // normal calls, so they need a different mask to represent this.
+ const uint32_t *getDarwinTLSCallPreservedMask() const;
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
@@ -108,9 +117,7 @@ public:
bool hasBasePointer(const MachineFunction &MF) const;
- bool canRealignStack(const MachineFunction &MF) const;
-
- bool needsStackRealignment(const MachineFunction &MF) const override;
+ bool canRealignStack(const MachineFunction &MF) const override;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const override;
@@ -128,16 +135,6 @@ public:
unsigned getSlotSize() const { return SlotSize; }
};
-/// Returns the sub or super register of a specific X86 register.
-/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
-/// Aborts on error.
-unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
-
-/// Returns the sub or super register of a specific X86 register.
-/// Like getX86SubSuperRegister() but returns 0 on error.
-unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
- bool High = false);
-
//get512BitRegister - X86 utility - returns 512-bit super register
unsigned get512BitSuperRegister(unsigned Reg);
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index cdb151c..56f0d93 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -225,15 +225,15 @@ let SubRegIndices = [sub_ymm] in {
}
}
- // Mask Registers, used by AVX-512 instructions.
- def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
- def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
- def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
- def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
- def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
- def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
- def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
- def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
// Floating point stack registers. These don't map one-to-one to the FP
// pseudo registers, but we still mark them as aliasing FP registers. That
@@ -375,7 +375,7 @@ def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
R8, R9, R11, RIP)>;
def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
- R8, R9, R11)>;
+ R8, R9, R10, R11, RIP)>;
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
+def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;
+
// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
@@ -442,10 +444,11 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
}
// Generic vector registers: VR64 and VR128.
+// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
128, (add FR32)>;
-def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Status flags registers.
@@ -459,8 +462,8 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
}
// AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
- (sequence "ZMM%u", 0, 31)>;
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+ 512, (sequence "ZMM%u", 0, 31)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
@@ -468,10 +471,10 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
// Extended VR128 and VR256 for AVX-512 instructions
-def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (add FR32X)>;
-def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
- 256, (sequence "YMM%u", 0, 31)>;
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
+ 128, (add FR32X)>;
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+ 256, (sequence "YMM%u", 0, 31)>;
// Mask registers
def VK1 : RegisterClass<"X86", [i1], 8, (sequence "K%u", 0, 7)> {let Size = 8;}
@@ -491,4 +494,4 @@ def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
// Bound registers
-def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>; \ No newline at end of file
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index ce79fcf..b1a0161 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
return false;
}
-SDValue
-X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- MachinePointerInfo DstPtrInfo) const {
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
const X86Subtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<X86Subtarget>();
@@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- if (const char *bzeroEntry = V &&
+ if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
- EVT IntPtr =
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
0)
.setDiscardResult();
- std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI);
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
BytesLeft = SizeVal % UBytes;
}
- Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
- InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
+ InFlag);
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
@@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
- X86::ECX,
- Left, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+ Left, InFlag);
InFlag = Chain.getValue(1);
Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
@@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
unsigned BytesLeft = SizeVal % UBytes;
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI :
- X86::ESI,
- Src, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+ Src, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index dff3624..8ef08c9 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -44,9 +44,8 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
cl::desc("Enable early if-conversion on X86"));
-/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a blockaddress reference for the current subtarget according to how
+/// we should reference it in a non-pcrel context.
unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
if (isPICStyleGOT()) // 32-bit ELF targets.
return X86II::MO_GOTOFF;
@@ -58,9 +57,8 @@ unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
return X86II::MO_NO_FLAG;
}
-/// ClassifyGlobalReference - Classify a global variable reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a global variable reference for the current subtarget according to
+/// how we should reference it in a non-pcrel context.
unsigned char X86Subtarget::
ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// DLLImport only exists on windows, it is implemented as a load from a
@@ -147,9 +145,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
}
-/// getBZeroEntry - This function returns the name of a function which has an
-/// interface like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over memset with zero
+/// This function returns the name of a function which has an interface like
+/// the non-standard bzero function, if such a function exists on the
+/// current subtarget and it is considered preferable over memset with zero
/// passed as the second argument. Otherwise it returns null.
const char *X86Subtarget::getBZeroEntry() const {
// Darwin 10 has a __bzero entry point for this purpose.
@@ -166,8 +164,7 @@ bool X86Subtarget::hasSinCos() const {
is64Bit();
}
-/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
-/// to immediate address.
+/// Return true if the subtarget allows calls to immediate address.
bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
// FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
// but WinCOFFObjectWriter::RecordRelocation cannot emit them. Once it does,
@@ -192,9 +189,25 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}
+ // LAHF/SAHF are always supported in non-64-bit mode.
+ if (!In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+sahf," + FullFS;
+ else
+ FullFS = "+sahf";
+ }
+
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
+ // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
+ // 16-bytes and under that are reasonably fast. These features were
+ // introduced with Intel's Nehalem/Silvermont and AMD's Family10h
+ // micro-architectures respectively.
+ if (hasSSE42() || hasSSE4A())
+ IsUAMem16Slow = false;
+
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -224,13 +237,18 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
}
void X86Subtarget::initializeEnvironment() {
- X86SSELevel = NoMMXSSE;
+ X86SSELevel = NoSSE;
X863DNowLevel = NoThreeDNow;
HasCMov = false;
HasX86_64 = false;
HasPOPCNT = false;
HasSSE4A = false;
HasAES = false;
+ HasFXSR = false;
+ HasXSAVE = false;
+ HasXSAVEOPT = false;
+ HasXSAVEC = false;
+ HasXSAVES = false;
HasPCLMUL = false;
HasFMA = false;
HasFMA4 = false;
@@ -252,13 +270,15 @@ void X86Subtarget::initializeEnvironment() {
HasBWI = false;
HasVLX = false;
HasADX = false;
+ HasPKU = false;
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasLAHFSAHF = false;
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
- IsUAMemFast = false;
+ IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index f026d42..13d1026 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -47,11 +47,11 @@ class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
- NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+ NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
};
enum X863DNowEnum {
- NoThreeDNow, ThreeDNow, ThreeDNowA
+ NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
};
enum X86ProcFamilyEnum {
@@ -64,10 +64,10 @@ protected:
/// Which PIC style to use
PICStyles::Style PICStyle;
- /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
+ /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
X86SSEEnum X86SSELevel;
- /// 3DNow, 3DNow Athlon, or none supported.
+ /// MMX, 3DNow, 3DNow Athlon, or none supported.
X863DNowEnum X863DNowLevel;
/// True if this processor has conditional move instructions
@@ -86,6 +86,18 @@ protected:
/// Target has AES instructions
bool HasAES;
+ /// Target has FXSAVE/FXRESTOR instructions
+ bool HasFXSR;
+
+ /// Target has XSAVE instructions
+ bool HasXSAVE;
+ /// Target has XSAVEOPT instructions
+ bool HasXSAVEOPT;
+ /// Target has XSAVEC instructions
+ bool HasXSAVEC;
+ /// Target has XSAVES instructions
+ bool HasXSAVES;
+
/// Target has carry-less multiplication
bool HasPCLMUL;
@@ -140,16 +152,19 @@ protected:
/// Processor has RDSEED instructions.
bool HasRDSEED;
+ /// Processor has LAHF/SAHF instructions.
+ bool HasLAHFSAHF;
+
/// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
- /// True if unaligned memory access is fast.
- bool IsUAMemFast;
+ /// True if unaligned memory accesses of 16-bytes are slow.
+ bool IsUAMem16Slow;
- /// True if unaligned 32-byte memory accesses are slow.
+ /// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow;
/// True if SSE operations can have unaligned memory operands.
@@ -208,6 +223,9 @@ protected:
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX;
+ /// Processor has PKU extenstions
+ bool HasPKU;
+
/// Processot supports MPX - Memory Protection Extensions
bool HasMPX;
@@ -319,7 +337,6 @@ public:
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
bool hasCMov() const { return HasCMov; }
- bool hasMMX() const { return X86SSELevel >= MMX; }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -332,14 +349,22 @@ public:
bool hasFp256() const { return hasAVX(); }
bool hasInt256() const { return hasAVX2(); }
bool hasSSE4A() const { return HasSSE4A; }
+ bool hasMMX() const { return X863DNowLevel >= MMX; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAES() const { return HasAES; }
+ bool hasFXSR() const { return HasFXSR; }
+ bool hasXSAVE() const { return HasXSAVE; }
+ bool hasXSAVEOPT() const { return HasXSAVEOPT; }
+ bool hasXSAVEC() const { return HasXSAVEC; }
+ bool hasXSAVES() const { return HasXSAVES; }
bool hasPCLMUL() const { return HasPCLMUL; }
- bool hasFMA() const { return HasFMA; }
- // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
- bool hasFMA4() const { return HasFMA4 && !HasFMA; }
+ // Prefer FMA4 to FMA - its better for commutation/memory folding and
+ // has equal or better performance on all supported targets.
+ bool hasFMA() const { return HasFMA && !HasFMA4; }
+ bool hasFMA4() const { return HasFMA4; }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
@@ -355,9 +380,10 @@ public:
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
+ bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
- bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+ bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
@@ -375,6 +401,7 @@ public:
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasPKU() const { return HasPKU; }
bool hasMPX() const { return HasMPX; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
@@ -394,9 +421,11 @@ public:
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
+ bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -406,6 +435,10 @@ public:
return TargetTriple.isKnownWindowsMSVCEnvironment();
}
+ bool isTargetWindowsCoreCLR() const {
+ return TargetTriple.isWindowsCoreCLREnvironment();
+ }
+
bool isTargetWindowsCygwin() const {
return TargetTriple.isWindowsCygwinEnvironment();
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index fb9cb4b..0e7e4c0 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -28,10 +28,17 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::desc("Enable the machine combiner pass"),
cl::init(true), cl::Hidden);
+namespace llvm {
+void initializeWinEHStatePassPass(PassRegistry &);
+}
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
+
+ PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeWinEHStatePassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -45,7 +52,7 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSBinFormatELF())
return make_unique<X86ELFTargetObjectFile>();
- if (TT.isKnownWindowsMSVCEnvironment())
+ if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
return make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
return make_unique<TargetLoweringObjectFileCOFF>();
@@ -175,8 +182,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
//===----------------------------------------------------------------------===//
TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis(
- [this](Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); });
+ return TargetIRAnalysis([this](const Function &F) {
+ return TargetTransformInfo(X86TTIImpl(this, F));
+ });
}
@@ -246,6 +254,9 @@ bool X86PassConfig::addPreISel() {
}
void X86PassConfig::addPreRegAlloc() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createX86OptimizeLEAs());
+
addPass(createX86CallFrameOptimization());
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index 6f900ea..782768d 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Target/TargetLowering.h"
@@ -152,9 +153,8 @@ static std::string scalarConstantToHexString(const Constant *C) {
}
}
-MCSection *
-X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *X86WindowsTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst() && C) {
const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
@@ -171,5 +171,5 @@ X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
COFF::IMAGE_COMDAT_SELECT_ANY);
}
- return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
+ return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C);
}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index 66366b2..6b2448c 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -58,7 +58,7 @@ namespace llvm {
/// \brief Given a mergeable constant with the specified size and relocation
/// information, return a section that it should be placed in.
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7df7260..2e7bbb2 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
+
using namespace llvm;
#define DEBUG_TYPE "x86tti"
@@ -62,8 +63,8 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
if (ST->is64Bit())
return 64;
- return 32;
+ return 32;
}
unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
@@ -84,12 +85,12 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-unsigned X86TTIImpl::getArithmeticInstrCost(
+int X86TTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -101,10 +102,9 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// normally expanded to the sequence SRA + SRL + ADD + SRA.
// The OperandValue properties many not be same as that of previous
// operation;conservatively assume OP_None.
- unsigned Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
+ int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
TargetTransformInfo::OP_None,
TargetTransformInfo::OP_None);
@@ -115,8 +115,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
return Cost;
}
- static const CostTblEntry<MVT::SimpleValueType>
- AVX2UniformConstCostTable[] = {
+ static const CostTblEntry AVX2UniformConstCostTable[] = {
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
@@ -127,12 +126,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasAVX2()) {
- int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX2UniformConstCostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+ static const CostTblEntry AVX512CostTable[] = {
{ ISD::SHL, MVT::v16i32, 1 },
{ ISD::SRL, MVT::v16i32, 1 },
{ ISD::SRA, MVT::v16i32, 1 },
@@ -141,7 +140,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v8i64, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
+ if (ST->hasAVX512()) {
+ if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
{ ISD::SHL, MVT::v4i32, 1 },
@@ -154,7 +158,57 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRL, MVT::v2i64, 1 },
{ ISD::SHL, MVT::v4i64, 1 },
{ ISD::SRL, MVT::v4i64, 1 },
+ };
+
+ // Look for AVX2 lowering tricks.
+ if (ST->hasAVX2()) {
+ if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+ // On AVX2, a packed v16i16 shift left by a constant build_vector
+ // is lowered into a vector multiply (vpmullw).
+ return LT.first;
+
+ if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry XOPCostTable[] = {
+ // 128bit shifts take 1cy, but right shifts require negation beforehand.
+ { ISD::SHL, MVT::v16i8, 1 },
+ { ISD::SRL, MVT::v16i8, 2 },
+ { ISD::SRA, MVT::v16i8, 2 },
+ { ISD::SHL, MVT::v8i16, 1 },
+ { ISD::SRL, MVT::v8i16, 2 },
+ { ISD::SRA, MVT::v8i16, 2 },
+ { ISD::SHL, MVT::v4i32, 1 },
+ { ISD::SRL, MVT::v4i32, 2 },
+ { ISD::SRA, MVT::v4i32, 2 },
+ { ISD::SHL, MVT::v2i64, 1 },
+ { ISD::SRL, MVT::v2i64, 2 },
+ { ISD::SRA, MVT::v2i64, 2 },
+ // 256bit shifts require splitting if AVX2 didn't catch them above.
+ { ISD::SHL, MVT::v32i8, 2 },
+ { ISD::SRL, MVT::v32i8, 4 },
+ { ISD::SRA, MVT::v32i8, 4 },
+ { ISD::SHL, MVT::v16i16, 2 },
+ { ISD::SRL, MVT::v16i16, 4 },
+ { ISD::SRA, MVT::v16i16, 4 },
+ { ISD::SHL, MVT::v8i32, 2 },
+ { ISD::SRL, MVT::v8i32, 4 },
+ { ISD::SRA, MVT::v8i32, 4 },
+ { ISD::SHL, MVT::v4i64, 2 },
+ { ISD::SRL, MVT::v4i64, 4 },
+ { ISD::SRA, MVT::v4i64, 4 },
+ };
+ // Look for XOP lowering tricks.
+ if (ST->hasXOP()) {
+ if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX2CustomCostTable[] = {
{ ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
{ ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
@@ -163,7 +217,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
{ ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
- { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence.
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v32i8, 32*20 },
@@ -176,44 +231,44 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::UDIV, MVT::v4i64, 4*20 },
};
- if (ST->hasAVX512()) {
- int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX512CostTable[Idx].Cost;
- }
- // Look for AVX2 lowering tricks.
+ // Look for AVX2 lowering tricks for custom cases.
if (ST->hasAVX2()) {
- if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
- (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
- // On AVX2, a packed v16i16 shift left by a constant build_vector
- // is lowered into a vector multiply (vpmullw).
- return LT.first;
-
- int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * AVX2CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType>
+ static const CostTblEntry
SSE2UniformConstCostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// Constant splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
+ { ISD::SHL, MVT::v32i8, 2 }, // psllw.
{ ISD::SHL, MVT::v8i16, 1 }, // psllw.
+ { ISD::SHL, MVT::v16i16, 2 }, // psllw.
{ ISD::SHL, MVT::v4i32, 1 }, // pslld
+ { ISD::SHL, MVT::v8i32, 2 }, // pslld
{ ISD::SHL, MVT::v2i64, 1 }, // psllq.
+ { ISD::SHL, MVT::v4i64, 2 }, // psllq.
{ ISD::SRL, MVT::v16i8, 1 }, // psrlw.
+ { ISD::SRL, MVT::v32i8, 2 }, // psrlw.
{ ISD::SRL, MVT::v8i16, 1 }, // psrlw.
+ { ISD::SRL, MVT::v16i16, 2 }, // psrlw.
{ ISD::SRL, MVT::v4i32, 1 }, // psrld.
+ { ISD::SRL, MVT::v8i32, 2 }, // psrld.
{ ISD::SRL, MVT::v2i64, 1 }, // psrlq.
+ { ISD::SRL, MVT::v4i64, 2 }, // psrlq.
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
+ { ISD::SRA, MVT::v32i8, 8 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
+ { ISD::SRA, MVT::v16i16, 2 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ { ISD::SRA, MVT::v8i32, 2 }, // psrad.
{ ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
+ { ISD::SRA, MVT::v4i64, 8 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
@@ -227,27 +282,34 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 15;
- int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
}
if (ISD == ISD::SHL &&
Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
- EVT VT = LT.second;
+ MVT VT = LT.second;
+ // Vector shift left by non uniform constant can be lowered
+ // into vector multiply (pmullw/pmulld).
if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
(VT == MVT::v4i32 && ST->hasSSE41()))
- // Vector shift left by non uniform constant can be lowered
- // into vector multiply (pmullw/pmulld).
return LT.first;
+
+ // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+ // sequence of extract + two vector multiply + insert.
+ if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+ (ST->hasAVX() && !ST->hasAVX2()))
+ ISD = ISD::MUL;
+
+ // A vector shift left by non uniform constant is converted
+ // into a vector multiply; the new multiply is eventually
+ // lowered into a sequence of shuffles and 2 x pmuludq.
if (VT == MVT::v4i32 && ST->hasSSE2())
- // A vector shift left by non uniform constant is converted
- // into a vector multiply; the new multiply is eventually
- // lowered into a sequence of shuffles and 2 x pmuludq.
ISD = ISD::MUL;
}
- static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
+ static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
// For some cases, where the shift amount is a scalar we would be able
@@ -257,20 +319,31 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// used for vectorization and we don't want to make vectorized code worse
// than scalar code.
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SHL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
+ { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v32i8, 2*26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRL, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRL, MVT::v8i32, 2*16 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence.
+ { ISD::SRL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
+ { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
+ { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence.
{ ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
- { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
+ { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence.
+ { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence.
// It is not a good idea to vectorize division. We have to scalarize it and
// in the process we will often end up having to spilling regular
@@ -289,12 +362,11 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
};
if (ST->hasSSE2()) {
- int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
- if (Idx != -1)
- return LT.first * SSE2CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
+ static const CostTblEntry AVX1CostTable[] = {
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
@@ -314,29 +386,21 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
// Look for AVX1 lowering tricks.
if (ST->hasAVX() && !ST->hasAVX2()) {
- EVT VT = LT.second;
+ MVT VT = LT.second;
- // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
- // sequence of extract + two vector multiply + insert.
- if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
- ISD = ISD::MUL;
-
- int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
- if (Idx != -1)
- return LT.first * AVX1CostTable[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+ return LT.first * Entry->Cost;
}
// Custom lowering of vectors.
- static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
+ static const CostTblEntry CustomLowered[] = {
// A v2i64/v4i64 and multiply is custom lowered as a series of long
// multiplies(3), shifts(4) and adds(2).
{ ISD::MUL, MVT::v2i64, 9 },
{ ISD::MUL, MVT::v4i64, 9 },
};
- int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
- if (Idx != -1)
- return LT.first * CustomLowered[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
+ return LT.first * Entry->Cost;
// Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
// 2x pmuludq, 2x shuffle.
@@ -348,15 +412,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
}
-unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) {
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
// We only estimate the cost of reverse and alternate shuffles.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
- unsigned Cost = 1;
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ int Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.
@@ -367,14 +431,14 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
if (ST->hasAVX2() && LT.second == MVT::v16i16)
return LT.first;
- static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
+ static const CostTblEntry AVXAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
{ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
@@ -390,13 +454,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
};
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * AVXAltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
+ static const CostTblEntry SSE41AltShuffleTbl[] = {
// These are lowered into movsd.
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
@@ -414,13 +477,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
};
- if (ST->hasSSE41()) {
- int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSE41AltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
+ LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
+ static const CostTblEntry SSSE3AltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
@@ -433,13 +495,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
};
- if (ST->hasSSSE3()) {
- int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
- }
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
- static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
+ static const CostTblEntry SSEAltShuffleTbl[] = {
{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
@@ -454,65 +515,47 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
};
// Fall-back (SSE3 and SSE2).
- int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx != -1)
- return LT.first * SSEAltShuffleTbl[Idx].Cost;
+ if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
+ ISD::VECTOR_SHUFFLE, LT.second))
+ return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
- std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
-
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- SSE2ConvTbl[] = {
- // These are somewhat magic numbers justified by looking at the output of
- // Intel's IACA, running some kernels and making sure when we take
- // legalization into account the throughput will be overestimated.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
- // There are faster sequences for float conversions.
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
- { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
- { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ // FIXME: Need a better design of the cost table to handle non-simple types of
+ // potential massive combinations (elem_num x src_type x dst_type).
+
+ static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
+
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 },
};
- if (ST->hasSSE2() && !ST->hasAVX()) {
- int Idx =
- ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
- if (Idx != -1)
- return LTSrc.first * SSE2ConvTbl[Idx].Cost;
- }
-
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVX512ConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
{ ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 },
- { ISD::FP_ROUND, MVT::v16f32, MVT::v8f64, 3 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 1 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 1 },
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 },
- { ISD::TRUNCATE, MVT::v16i32, MVT::v8i64, 4 },
// v16i1 -> v16i32 - load + broadcast
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
@@ -522,33 +565,49 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
- { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i32, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
- };
- if (ST->hasAVX512()) {
- int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
- LTSrc.second);
- if (Idx != -1)
- return AVX512ConversionTbl[Idx].Cost;
- }
- EVT SrcTy = TLI->getValueType(DL, Src);
- EVT DstTy = TLI->getValueType(DL, Dst);
-
- // The function getSimpleVT only handles simple value types.
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return BaseT::getCastInstrCost(Opcode, Dst, Src);
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
+
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
+ };
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVX2ConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
@@ -579,8 +638,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- AVXConversionTbl[] = {
+ static const TypeConversionCostTblEntry AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
@@ -650,34 +708,158 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
};
+ static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
+ };
+
+ static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
+ // These are somewhat magic numbers justified by looking at the output of
+ // Intel's IACA, running some kernels and making sure when we take
+ // legalization into account the throughput will be overestimated.
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ // There are faster sequences for float conversions.
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 10 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
+ };
+
+ std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
+
+ if (ST->hasSSE2() && !ST->hasAVX()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+ LTDest.second, LTSrc.second))
+ return LTSrc.first * Entry->Cost;
+ }
+
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
+
+ // The function getSimpleVT only handles simple value types.
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+ if (ST->hasDQI())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+
+ if (ST->hasAVX512())
+ if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+
if (ST->hasAVX2()) {
- int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
- DstTy.getSimpleVT(), SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVX2ConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
if (ST->hasAVX()) {
- int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
- if (Idx != -1)
- return AVXConversionTbl[Idx].Cost;
+ if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ if (ST->hasSSE41()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ if (ST->hasSSE2()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
}
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
+ static const CostTblEntry SSE42CostTbl[] = {
{ ISD::SETCC, MVT::v2f64, 1 },
{ ISD::SETCC, MVT::v4f32, 1 },
{ ISD::SETCC, MVT::v2i64, 1 },
@@ -686,7 +868,7 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v16i8, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
+ static const CostTblEntry AVX1CostTbl[] = {
{ ISD::SETCC, MVT::v4f64, 1 },
{ ISD::SETCC, MVT::v8f32, 1 },
// AVX1 does not support 8-wide integer compare.
@@ -696,54 +878,45 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SETCC, MVT::v32i8, 4 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
+ static const CostTblEntry AVX2CostTbl[] = {
{ ISD::SETCC, MVT::v4i64, 1 },
{ ISD::SETCC, MVT::v8i32, 1 },
{ ISD::SETCC, MVT::v16i16, 1 },
{ ISD::SETCC, MVT::v32i8, 1 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+ static const CostTblEntry AVX512CostTbl[] = {
{ ISD::SETCC, MVT::v8i64, 1 },
{ ISD::SETCC, MVT::v16i32, 1 },
{ ISD::SETCC, MVT::v8f64, 1 },
{ ISD::SETCC, MVT::v16f32, 1 },
};
- if (ST->hasAVX512()) {
- int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX512CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX512())
+ if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX2()) {
- int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX2CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTbl[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTbl[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) {
+int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
assert(Val->isVectorTy() && "This must be a vector type");
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -761,10 +934,9 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return BaseT::getVectorInstrCost(Opcode, Val, Index);
}
-unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
- bool Extract) {
+int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
assert (Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
+ int Cost = 0;
for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
if (Insert)
@@ -776,9 +948,8 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
return Cost;
}
-unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) {
+int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@@ -796,22 +967,21 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// Assume that all other non-power-of-two numbers are scalarized.
if (!isPowerOf2_32(NumElem)) {
- unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
- Alignment, AddressSpace);
- unsigned SplitCost = getScalarizationOverhead(Src,
- Opcode == Instruction::Load,
- Opcode==Instruction::Store);
+ int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
+ AddressSpace);
+ int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load,
+ Opcode == Instruction::Store);
return NumElem * Cost + SplitCost;
}
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
// Each load/store unit costs 1.
- unsigned Cost = LT.first * 1;
+ int Cost = LT.first * 1;
// On Sandybridge 256bit load/stores are double pumped
// (but not on Haswell).
@@ -821,9 +991,9 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
return Cost;
}
-unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
- unsigned Alignment,
- unsigned AddressSpace) {
+int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
+ unsigned Alignment,
+ unsigned AddressSpace) {
VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
if (!SrcVTy)
// To calculate scalar take the regular cost, without mask
@@ -832,34 +1002,33 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
unsigned NumElem = SrcVTy->getVectorNumElements();
VectorType *MaskTy =
VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
- if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
- (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
+ if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
+ (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
!isPowerOf2_32(NumElem)) {
// Scalarization
- unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
- unsigned ScalarCompareCost =
- getCmpSelInstrCost(Instruction::ICmp,
- Type::getInt8Ty(getGlobalContext()), NULL);
- unsigned BranchCost = getCFInstrCost(Instruction::Br);
- unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
-
- unsigned ValueSplitCost =
- getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
- Opcode == Instruction::Store);
- unsigned MemopCost =
+ int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
+ int ScalarCompareCost = getCmpSelInstrCost(
+ Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), nullptr);
+ int BranchCost = getCFInstrCost(Instruction::Br);
+ int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
+
+ int ValueSplitCost = getScalarizationOverhead(
+ SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+ int MemopCost =
NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
- unsigned Cost = 0;
- if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+ auto VT = TLI->getValueType(DL, SrcVTy);
+ int Cost = 0;
+ if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
- Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
- getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
+ Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
+ getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
@@ -874,7 +1043,7 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
return Cost+LT.first;
}
-unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -887,10 +1056,10 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
return BaseT::getAddressComputationCost(Ty, IsComplex);
}
-unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
- bool IsPairwise) {
+int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
+ bool IsPairwise) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+ std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -900,7 +1069,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
+ static const CostTblEntry SSE42CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -908,7 +1077,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i16, 5 },
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
+ static const CostTblEntry AVX1CostTblPairWise[] = {
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
{ ISD::FADD, MVT::v8f32, 7 },
@@ -919,7 +1088,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i32, 5 },
};
- static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
+ static const CostTblEntry SSE42CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -927,7 +1096,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
};
- static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
+ static const CostTblEntry AVX1CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
{ ISD::FADD, MVT::v8f32, 4 },
@@ -939,29 +1108,21 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
};
if (IsPairwise) {
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTblPairWise[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTblPairWise[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
} else {
- if (ST->hasAVX()) {
- int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
- }
+ if (ST->hasAVX())
+ if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
- if (ST->hasSSE42()) {
- int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
- if (Idx != -1)
- return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
- }
+ if (ST->hasSSE42())
+ if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+ return LT.first * Entry->Cost;
}
return BaseT::getReductionCost(Opcode, ValTy, IsPairwise);
@@ -970,7 +1131,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
-unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
+int X86TTIImpl::getIntImmCost(int64_t Val) {
if (Val == 0)
return TTI::TCC_Free;
@@ -980,7 +1141,7 @@ unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
return 2 * TTI::TCC_Basic;
}
-unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1004,18 +1165,18 @@ unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
// Split the constant into 64-bit chunks and calculate the cost for each
// chunk.
- unsigned Cost = 0;
+ int Cost = 0;
for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
int64_t Val = Tmp.getSExtValue();
Cost += getIntImmCost(Val);
}
// We need at least one instruction to materialze the constant.
- return std::max(1U, Cost);
+ return std::max(1, Cost);
}
-unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1038,6 +1199,26 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::Store:
ImmIdx = 0;
break;
+ case Instruction::ICmp:
+ // This is an imperfect hack to prevent constant hoisting of
+ // compares that might be trying to check if a 64-bit value fits in
+ // 32-bits. The backend can optimize these cases using a right shift by 32.
+ // Ideally we would check the compare predicate here. There also other
+ // similar immediates the backend can use shifts for.
+ if (Idx == 1 && Imm.getBitWidth() == 64) {
+ uint64_t ImmVal = Imm.getZExtValue();
+ if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
+ return TTI::TCC_Free;
+ }
+ ImmIdx = 1;
+ break;
+ case Instruction::And:
+ // We support 64-bit ANDs with immediates with 32-bits of leading zeroes
+ // by using a 32-bit operation with implicit zero extension. Detect such
+ // immediates here as the normal path expects bit 31 to be sign extended.
+ if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Fallthrough
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1045,10 +1226,8 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- case Instruction::ICmp:
ImmIdx = 1;
break;
// Always return TCC_Free for the shift value of a shift instruction.
@@ -1073,18 +1252,18 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
}
if (Idx == ImmIdx) {
- unsigned NumConstants = (BitSize + 63) / 64;
- unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+ int NumConstants = (BitSize + 63) / 64;
+ int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
return (Cost <= NumConstants * TTI::TCC_Basic)
- ? static_cast<unsigned>(TTI::TCC_Free)
+ ? static_cast<int>(TTI::TCC_Free)
: Cost;
}
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
- const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1118,23 +1297,181 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
return X86TTIImpl::getIntImmCost(Imm, Ty);
}
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
- int DataWidth = DataTy->getPrimitiveSizeInBits();
+// Return an average cost of Gather / Scatter instruction, maybe improved later
+int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
+ unsigned Alignment, unsigned AddressSpace) {
+
+ assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
+ unsigned VF = SrcVTy->getVectorNumElements();
+
+ // Try to reduce index size from 64 bit (default for GEP)
+ // to 32. It is essential for VF 16. If the index can't be reduced to 32, the
+ // operation will use 16 x 64 indices which do not fit in a zmm and needs
+ // to split. Also check that the base pointer is the same for all lanes,
+ // and that there's at most one variable index.
+ auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) {
+ unsigned IndexSize = DL.getPointerSizeInBits();
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (IndexSize < 64 || !GEP)
+ return IndexSize;
+
+ unsigned NumOfVarIndices = 0;
+ Value *Ptrs = GEP->getPointerOperand();
+ if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
+ return IndexSize;
+ for (unsigned i = 1; i < GEP->getNumOperands(); ++i) {
+ if (isa<Constant>(GEP->getOperand(i)))
+ continue;
+ Type *IndxTy = GEP->getOperand(i)->getType();
+ if (IndxTy->isVectorTy())
+ IndxTy = IndxTy->getVectorElementType();
+ if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
+ !isa<SExtInst>(GEP->getOperand(i))) ||
+ ++NumOfVarIndices > 1)
+ return IndexSize; // 64
+ }
+ return (unsigned)32;
+ };
+
+
+ // Trying to reduce IndexSize to 32 bits for vector 16.
+ // By default the IndexSize is equal to pointer size.
+ unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
+ DL.getPointerSizeInBits();
+
+ Type *IndexVTy = VectorType::get(IntegerType::get(getGlobalContext(),
+ IndexSize), VF);
+ std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
+ std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+ int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
+ if (SplitFactor > 1) {
+ // Handle splitting of vector of pointers
+ Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
+ return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
+ AddressSpace);
+ }
+
+ // The gather / scatter cost is given by Intel architects. It is a rough
+ // number since we are looking at one instruction in a time.
+ const int GSOverhead = 2;
+ return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+ Alignment, AddressSpace);
+}
+
+/// Return the cost of full scalarization of gather / scatter operation.
+///
+/// Opcode - Load or Store instruction.
+/// SrcVTy - The type of the data vector that should be gathered or scattered.
+/// VariableMask - The mask is non-constant at compile time.
+/// Alignment - Alignment for one element.
+/// AddressSpace - pointer[s] address space.
+///
+int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
+ bool VariableMask, unsigned Alignment,
+ unsigned AddressSpace) {
+ unsigned VF = SrcVTy->getVectorNumElements();
+
+ int MaskUnpackCost = 0;
+ if (VariableMask) {
+ VectorType *MaskTy =
+ VectorType::get(Type::getInt1Ty(getGlobalContext()), VF);
+ MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true);
+ int ScalarCompareCost =
+ getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(getGlobalContext()),
+ nullptr);
+ int BranchCost = getCFInstrCost(Instruction::Br);
+ MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
+ }
- // Todo: AVX512 allows gather/scatter, works with strided and random as well
- if ((DataWidth < 32) || (Consecutive == 0))
+ // The cost of the scalar loads/stores.
+ int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+ Alignment, AddressSpace);
+
+ int InsertExtractCost = 0;
+ if (Opcode == Instruction::Load)
+ for (unsigned i = 0; i < VF; ++i)
+ // Add the cost of inserting each scalar load into the vector
+ InsertExtractCost +=
+ getVectorInstrCost(Instruction::InsertElement, SrcVTy, i);
+ else
+ for (unsigned i = 0; i < VF; ++i)
+ // Add the cost of extracting each element out of the data vector
+ InsertExtractCost +=
+ getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i);
+
+ return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
+}
+
+/// Calculate the cost of Gather / Scatter operation
+int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
+ Value *Ptr, bool VariableMask,
+ unsigned Alignment) {
+ assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
+ unsigned VF = SrcVTy->getVectorNumElements();
+ PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy && Ptr->getType()->isVectorTy())
+ PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
+ assert(PtrTy && "Unexpected type for Ptr argument");
+ unsigned AddressSpace = PtrTy->getAddressSpace();
+
+ bool Scalarize = false;
+ if ((Opcode == Instruction::Load && !isLegalMaskedGather(SrcVTy)) ||
+ (Opcode == Instruction::Store && !isLegalMaskedScatter(SrcVTy)))
+ Scalarize = true;
+ // Gather / Scatter for vector 2 is not profitable on KNL / SKX
+ // Vector-4 of gather/scatter instruction does not exist on KNL.
+ // We can extend it to 8 elements, but zeroing upper bits of
+ // the mask vector will add more instructions. Right now we give the scalar
+ // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction is
+ // better in the VariableMask case.
+ if (VF == 2 || (VF == 4 && !ST->hasVLX()))
+ Scalarize = true;
+
+ if (Scalarize)
+ return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, AddressSpace);
+
+ return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
+}
+
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+ Type *ScalarTy = DataTy->getScalarType();
+ int DataWidth = isa<PointerType>(ScalarTy) ?
+ DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+ return (DataWidth >= 32 && ST->hasAVX2());
+}
+
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
+ return isLegalMaskedLoad(DataType);
+}
+
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+ // This function is called now in two cases: from the Loop Vectorizer
+ // and from the Scalarizer.
+ // When the Loop Vectorizer asks about legality of the feature,
+ // the vectorization factor is not calculated yet. The Loop Vectorizer
+ // sends a scalar type and the decision is based on the width of the
+ // scalar element.
+ // Later on, the cost model will estimate usage this intrinsic based on
+ // the vector type.
+ // The Scalarizer asks again about legality. It sends a vector type.
+ // In this case we can reject non-power-of-2 vectors.
+ if (isa<VectorType>(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements()))
return false;
- if (ST->hasAVX512() || ST->hasAVX2())
- return true;
- return false;
+ Type *ScalarTy = DataTy->getScalarType();
+ int DataWidth = isa<PointerType>(ScalarTy) ?
+ DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+ // AVX-512 allows gather and scatter
+ return DataWidth >= 32 && ST->hasAVX512();
}
-bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
- return isLegalMaskedLoad(DataType, Consecutive);
+bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+ return isLegalMaskedGather(DataType);
}
-bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const {
+bool X86TTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
// Work this as a subsetting of subtarget features.
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index da3f36c..adb745e 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,13 +33,13 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *ST;
const X86TargetLowering *TLI;
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+ int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
public:
- explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
+ explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
@@ -62,38 +62,44 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getMaxInterleaveFactor(unsigned VF);
- unsigned getArithmeticInstrCost(
+ int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
- unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp);
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
- unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
-
- unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex);
-
- unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
-
- unsigned getIntImmCost(int64_t);
-
- unsigned getIntImmCost(const APInt &Imm, Type *Ty);
-
- unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
- Type *Ty);
- unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty);
- bool isLegalMaskedLoad(Type *DataType, int Consecutive);
- bool isLegalMaskedStore(Type *DataType, int Consecutive);
- bool hasCompatibleFunctionAttributes(const Function *Caller,
- const Function *Callee) const;
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask, unsigned Alignment);
+ int getAddressComputationCost(Type *PtrTy, bool IsComplex);
+
+ int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
+
+ int getIntImmCost(int64_t);
+
+ int getIntImmCost(const APInt &Imm, Type *Ty);
+
+ int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+ int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ bool isLegalMaskedLoad(Type *DataType);
+ bool isLegalMaskedStore(Type *DataType);
+ bool isLegalMaskedGather(Type *DataType);
+ bool isLegalMaskedScatter(Type *DataType);
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+private:
+ int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+ unsigned Alignment, unsigned AddressSpace);
+ int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ unsigned Alignment, unsigned AddressSpace);
/// @}
};
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index 9190d0b..dce94a9 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -15,7 +15,8 @@
//===----------------------------------------------------------------------===//
#include "X86.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -38,12 +39,16 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "winehstate"
+namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); }
+
namespace {
class WinEHStatePass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- WinEHStatePass() : FunctionPass(ID) {}
+ WinEHStatePass() : FunctionPass(ID) {
+ initializeWinEHStatePassPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &Fn) override;
@@ -62,18 +67,13 @@ private:
void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler);
void unlinkExceptionRegistration(IRBuilder<> &Builder);
- void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
- void addSEHStateStores(Function &F, MachineModuleInfo &MMI);
- void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
- Function &F, int BaseState);
+ void addStateStores(Function &F, WinEHFuncInfo &FuncInfo);
void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
Function *generateLSDAInEAXThunk(Function *ParentFunc);
- int escapeRegNode(Function &F);
-
// Module-level type getters.
Type *getEHLinkRegistrationType();
Type *getSEHRegistrationType();
@@ -111,6 +111,9 @@ FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
char WinEHStatePass::ID = 0;
+INITIALIZE_PASS(WinEHStatePass, "x86-winehstate",
+ "Insert stores for EH state numbers", false, false)
+
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
@@ -138,14 +141,7 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool WinEHStatePass::runOnFunction(Function &F) {
- // If this is an outlined handler, don't do anything. We'll do state insertion
- // for it in the parent.
- StringRef WinEHParentName =
- F.getFnAttribute("wineh-parent").getValueAsString();
- if (WinEHParentName != F.getName() && !WinEHParentName.empty())
- return false;
-
- // Check the personality. Do nothing if this is not an MSVC personality.
+ // Check the personality. Do nothing if this personality doesn't use funclets.
if (!F.hasPersonalityFn())
return false;
PersonalityFn =
@@ -153,7 +149,19 @@ bool WinEHStatePass::runOnFunction(Function &F) {
if (!PersonalityFn)
return false;
Personality = classifyEHPersonality(PersonalityFn);
- if (!isMSVCEHPersonality(Personality))
+ if (!isFuncletEHPersonality(Personality))
+ return false;
+
+ // Skip this function if there are no EH pads and we aren't using IR-level
+ // outlining.
+ bool HasPads = false;
+ for (BasicBlock &BB : F) {
+ if (BB.isEHPad()) {
+ HasPads = true;
+ break;
+ }
+ }
+ if (!HasPads)
return false;
// Disable frame pointer elimination in this function.
@@ -163,14 +171,13 @@ bool WinEHStatePass::runOnFunction(Function &F) {
emitExceptionRegistrationRecord(&F);
- auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
- assert(MMIPtr && "MachineModuleInfo should always be available");
- MachineModuleInfo &MMI = *MMIPtr;
- switch (Personality) {
- default: llvm_unreachable("unexpected personality function");
- case EHPersonality::MSVC_CXX: addCXXStateStores(F, MMI); break;
- case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break;
- }
+ // The state numbers calculated here in IR must agree with what we calculate
+ // later on for the MachineFunction. In particular, if an IR pass deletes an
+ // unreachable EH pad after this point before machine CFG construction, we
+ // will be in trouble. If this assumption is ever broken, we should turn the
+ // numbers into an immutable analysis pass.
+ WinEHFuncInfo FuncInfo;
+ addStateStores(F, FuncInfo);
// Reset per-function state.
PersonalityFn = nullptr;
@@ -261,7 +268,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -1
StateFieldIndex = 2;
- insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1);
+ insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(), -1);
// Handler = __ehhandler$F
Function *Trampoline = generateLSDAInEAXThunk(F);
Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
@@ -278,7 +285,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
// TryLevel = -2 / -1
StateFieldIndex = 4;
- insertStateNumberStore(RegNode, Builder.GetInsertPoint(),
+ insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(),
UseStackGuard ? -2 : -1);
// ScopeTable = llvm.x86.seh.lsda(F)
Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
@@ -347,7 +354,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
Value *CastPersonality =
Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
auto AI = Trampoline->arg_begin();
- Value *Args[5] = {LSDA, AI++, AI++, AI++, AI++};
+ Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
CallInst *Call = Builder.CreateCall(CastPersonality, Args);
// Can't use musttail due to prototype mismatch, but we can use tail.
Call->setTailCall(true);
@@ -391,160 +398,53 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
Builder.CreateStore(Next, FSZero);
}
-void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
- calculateWinCXXEHStateNumbers(&F, FuncInfo);
-
- // The base state for the parent is -1.
- addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1);
-
- // Set up RegNodeEscapeIndex
- int RegNodeEscapeIndex = escapeRegNode(F);
- FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
- // Only insert stores in catch handlers.
- Constant *FI8 =
- ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext()));
- for (auto P : FuncInfo.HandlerBaseState) {
- Function *Handler = const_cast<Function *>(P.first);
- int BaseState = P.second;
- IRBuilder<> Builder(&Handler->getEntryBlock(),
- Handler->getEntryBlock().begin());
- // FIXME: Find and reuse such a call if present.
- Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)});
- Value *RecoveredRegNode = Builder.CreateCall(
- FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)});
- RecoveredRegNode =
- Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0));
- addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState);
- }
-}
-
-/// Escape RegNode so that we can access it from child handlers. Find the call
-/// to localescape, if any, in the entry block and append RegNode to the list
-/// of arguments.
-int WinEHStatePass::escapeRegNode(Function &F) {
- // Find the call to localescape and extract its arguments.
- IntrinsicInst *EscapeCall = nullptr;
- for (Instruction &I : F.getEntryBlock()) {
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::localescape) {
- EscapeCall = II;
- break;
- }
- }
- SmallVector<Value *, 8> Args;
- if (EscapeCall) {
- auto Ops = EscapeCall->arg_operands();
- Args.append(Ops.begin(), Ops.end());
- }
- Args.push_back(RegNode);
-
- // Replace the call (if it exists) with new one. Otherwise, insert at the end
- // of the entry block.
- Instruction *InsertPt = EscapeCall;
- if (!EscapeCall)
- InsertPt = F.getEntryBlock().getTerminator();
- IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
- Builder.CreateCall(FrameEscape, Args);
- if (EscapeCall)
- EscapeCall->eraseFromParent();
- return Args.size() - 1;
-}
+void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
+ // Mark the registration node. The backend needs to know which alloca it is so
+ // that it can recover the original frame pointer.
+ IRBuilder<> Builder(RegNode->getParent(), std::next(RegNode->getIterator()));
+ Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getInt8PtrTy());
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehregnode),
+ {RegNodeI8});
+
+ // Calculate state numbers.
+ if (isAsynchronousEHPersonality(Personality))
+ calculateSEHStateNumbers(&F, FuncInfo);
+ else
+ calculateWinCXXEHStateNumbers(&F, FuncInfo);
-void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
- WinEHFuncInfo &FuncInfo,
- Function &F, int BaseState) {
// Iterate all the instructions and emit state number stores.
+ DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
for (BasicBlock &BB : F) {
+ // Figure out what state we should assign calls in this block.
+ int BaseState = -1;
+ auto &BBColors = BlockColors[&BB];
+
+ assert(BBColors.size() == 1 &&
+ "multi-color BB not removed by preparation");
+ BasicBlock *FuncletEntryBB = BBColors.front();
+ if (auto *FuncletPad =
+ dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
+ auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+ if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+ BaseState = BaseStateI->second;
+ }
+
for (Instruction &I : BB) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
// Possibly throwing call instructions have no actions to take after
// an unwind. Ensure they are in the -1 state.
if (CI->doesNotThrow())
continue;
- insertStateNumberStore(ParentRegNode, CI, BaseState);
+ insertStateNumberStore(RegNode, CI, BaseState);
} else if (auto *II = dyn_cast<InvokeInst>(&I)) {
// Look up the state number of the landingpad this unwinds to.
- LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
- // FIXME: Why does this assertion fail?
- //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!");
- int State = FuncInfo.LandingPadStateMap[LPI];
- insertStateNumberStore(ParentRegNode, II, State);
- }
- }
- }
-}
-
-/// Assign every distinct landingpad a unique state number for SEH. Unlike C++
-/// EH, we can use this very simple algorithm while C++ EH cannot because catch
-/// handlers aren't outlined and the runtime doesn't have to figure out which
-/// catch handler frame to unwind to.
-/// FIXME: __finally blocks are outlined, so this approach may break down there.
-void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
- WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
-
- // Remember and return the index that we used. We save it in WinEHFuncInfo so
- // that we can lower llvm.x86.seh.recoverfp later in filter functions without
- // too much trouble.
- int RegNodeEscapeIndex = escapeRegNode(F);
- FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
- // Iterate all the instructions and emit state number stores.
- int CurState = 0;
- SmallPtrSet<BasicBlock *, 4> ExceptBlocks;
- for (BasicBlock &BB : F) {
- for (auto I = BB.begin(), E = BB.end(); I != E; ++I) {
- if (auto *CI = dyn_cast<CallInst>(I)) {
- auto *Intrin = dyn_cast<IntrinsicInst>(CI);
- if (Intrin) {
- // Calls that "don't throw" are considered to be able to throw asynch
- // exceptions, but intrinsics cannot.
- continue;
- }
- insertStateNumberStore(RegNode, CI, -1);
- } else if (auto *II = dyn_cast<InvokeInst>(I)) {
- // Look up the state number of the landingpad this unwinds to.
- LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
- auto InsertionPair =
- FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState));
- auto Iter = InsertionPair.first;
- int &State = Iter->second;
- bool Inserted = InsertionPair.second;
- if (Inserted) {
- // Each action consumes a state number.
- auto *EHActions = cast<IntrinsicInst>(LPI->getNextNode());
- SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
- parseEHActions(EHActions, ActionList);
- assert(!ActionList.empty());
- CurState += ActionList.size();
- State += ActionList.size() - 1;
-
- // Remember all the __except block targets.
- for (auto &Handler : ActionList) {
- if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
- auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
-#ifndef NDEBUG
- for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
- assert(Pred->isLandingPad() &&
- "WinEHPrepare failed to split block");
-#endif
- ExceptBlocks.insert(BA->getBasicBlock());
- }
- }
- }
+ assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
+ int State = FuncInfo.InvokeStateMap[II];
insertStateNumberStore(RegNode, II, State);
}
}
}
-
- // Insert llvm.x86.seh.restoreframe() into each __except block.
- Function *RestoreFrame =
- Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
- for (BasicBlock *ExceptBB : ExceptBlocks) {
- IRBuilder<> Builder(ExceptBB->begin());
- Builder.CreateCall(RestoreFrame, {});
- }
}
void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 2e44ac9..aaf267a 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -224,7 +224,7 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
if (Val > 11)
return MCDisassembler::Fail;
- static unsigned Values[] = {
+ static const unsigned Values[] = {
32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
};
Inst.addOperand(MCOperand::createImm(Values[Val]));
diff --git a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 6fd2dec..dc513f7 100644
--- a/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/contrib/llvm/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -19,8 +19,6 @@
namespace llvm {
-class TargetMachine;
-
class XCoreInstPrinter : public MCInstPrinter {
public:
XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 702056d..b00cdd5 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -115,14 +115,14 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
EmitSpecialLLVMGlobal(GV))
return;
- const DataLayout *TD = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
OutStreamer->SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
- unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
-
+ unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType());
+
// Mark the start of the global
getTargetStreamer().emitCCTopData(GVSym->getName());
@@ -154,15 +154,15 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GV->isThreadLocal()) {
report_fatal_error("TLS is not supported by this target!");
}
- unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Size = DL.getTypeAllocSize(C->getType());
if (MAI->hasDotTypeDotSizeDirective()) {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
MCConstantExpr::create(Size, OutContext));
}
OutStreamer->EmitLabel(GVSym);
-
- EmitGlobalConstant(C);
+
+ EmitGlobalConstant(DL, C);
// The ABI requires that unsigned scalar types smaller than 32 bits
// are padded to 32 bits.
if (Size < 4)
@@ -208,7 +208,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
raw_ostream &O) {
- const DataLayout *DL = TM.getDataLayout();
+ const DataLayout &DL = getDataLayout();
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -224,8 +224,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
getSymbol(MO.getGlobal())->print(O, MAI);
break;
case MachineOperand::MO_ConstantPoolIndex:
- O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
- << '_' << MO.getIndex();
+ O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+ << MO.getIndex();
break;
case MachineOperand::MO_BlockAddress:
GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 76c3d81..ae493de 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -160,27 +160,26 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
/// As offsets are negative, the largest offsets will be first.
static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
MachineFrameInfo *MFI, XCoreFunctionInfo *XFI,
+ const Constant *PersonalityFn,
const TargetLowering *TL) {
assert(XFI->hasEHSpillSlot() && "There are no EH register spill slots");
- const int* EHSlot = XFI->getEHSpillSlot();
- SpillList.push_back(StackSlotInfo(EHSlot[0],
- MFI->getObjectOffset(EHSlot[0]),
- TL->getExceptionPointerRegister()));
- SpillList.push_back(StackSlotInfo(EHSlot[0],
- MFI->getObjectOffset(EHSlot[1]),
- TL->getExceptionSelectorRegister()));
+ const int *EHSlot = XFI->getEHSpillSlot();
+ SpillList.push_back(
+ StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[0]),
+ TL->getExceptionPointerRegister(PersonalityFn)));
+ SpillList.push_back(
+ StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[1]),
+ TL->getExceptionSelectorRegister(PersonalityFn)));
std::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
}
-
static MachineMemOperand *
getFrameIndexMMO(MachineBasicBlock &MBB, int FrameIndex, unsigned flags) {
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- flags, MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags,
+ MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex));
return MMO;
}
@@ -323,8 +322,11 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
if (XFI->hasEHSpillSlot()) {
// The unwinder requires stack slot & CFI offsets for the exception info.
// We do not save/spill these registers.
- SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI,
+ const Function *Fn = MF.getFunction();
+ const Constant *PersonalityFn =
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+ SmallVector<StackSlotInfo, 2> SpillList;
+ GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
MF.getSubtarget().getTargetLowering());
assert(SpillList.size()==2 && "Unexpected SpillList size");
EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
@@ -355,8 +357,12 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
if (RetOpcode == XCore::EH_RETURN) {
// 'Restore' the exception info the unwinder has placed into the stack
// slots.
- SmallVector<StackSlotInfo,2> SpillList;
- GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering());
+ const Function *Fn = MF.getFunction();
+ const Constant *PersonalityFn =
+ Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+ SmallVector<StackSlotInfo, 2> SpillList;
+ GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
+ MF.getSubtarget().getTargetLowering());
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
// Return to the landing pad.
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 9d4a966..9f61c84 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -151,8 +151,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
MVT::Other, CPIdx,
CurDAG->getEntryNode());
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = MF->getMachineMemOperand(
- MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);
+ MemOp[0] =
+ MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+ MachineMemOperand::MOLoad, 4, 4);
cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
return node;
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index d62e742..105b2cf 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -79,9 +79,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget.getRegisterInfo());
- // Division is expensive
- setIntDivIsCheap(false);
-
setStackPointerRegisterToSaveRestore(XCore::SP);
setSchedulingPreference(Sched::Source);
@@ -154,8 +151,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
// Exception handling
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
- setExceptionPointerRegister(XCore::R0);
- setExceptionSelectorRegister(XCore::R1);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
// Atomic operations
@@ -839,7 +834,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
return DAG.getLoad(
getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN,
- MachinePointerInfo::getFixedStack(FI), false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0);
}
SDValue XCoreTargetLowering::
@@ -1367,8 +1362,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+ MachinePointerInfo::getFixedStack(MF, FI), false,
+ false, false, 0);
}
const ArgDataPair ADP = { ArgIn, Ins[i].Flags };
ArgData.push_back(ADP);
@@ -1517,9 +1512,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Create a SelectionDAG node corresponding to a store
// to this memory location.
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN,
- MachinePointerInfo::getFixedStack(FI), false, false,
- 0));
+ MemOpChains.push_back(DAG.getStore(
+ Chain, dl, OutVals[i], FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+ false, 0));
}
// Transform all store nodes into one single node because
@@ -1567,8 +1563,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// to set, the condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It;
+ MachineFunction::iterator It = ++BB->getIterator();
// thisMBB:
// ...
@@ -1828,9 +1823,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Chain = ST->getChain();
unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
- if (StoreBits % 8) {
- break;
- }
+ assert((StoreBits % 8) == 0 &&
+ "Store size in bits must be a multiple of 8");
unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
unsigned Alignment = ST->getAlignment();
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
index ddd675c..b6f09ff 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.h
@@ -125,6 +125,20 @@ namespace llvm {
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ unsigned
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+ return XCore::R0;
+ }
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ unsigned
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+ return XCore::R1;
+ }
+
private:
const TargetMachine &TM;
const XCoreSubtarget &Subtarget;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index ee30344..e4129ae 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -368,11 +368,10 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+ MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::STWFI))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FrameIndex)
@@ -391,11 +390,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
- MachineMemOperand *MMO =
- MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+ MFI.getObjectAlignment(FrameIndex));
BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
.addFrameIndex(FrameIndex)
.addImm(0)
diff --git a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 996c6f5..f0b7201 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -228,12 +228,9 @@ bool XCoreLowerThreadLocal::runOnModule(Module &M) {
// Find thread local globals.
bool MadeChange = false;
SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ++GVI) {
- GlobalVariable *GV = GVI;
- if (GV->isThreadLocal())
- ThreadLocalGlobals.push_back(GV);
- }
+ for (GlobalVariable &GV : M.globals())
+ if (GV.isThreadLocal())
+ ThreadLocalGlobals.push_back(&GV);
for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index 9ef9752..6c77096 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 078ffde..cdcc52f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index f420081..4a79dac 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -85,7 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
}
TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](Function &F) {
+ return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(XCoreTTIImpl(this, F));
});
}
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index b5a9905..aa16ecc 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -123,18 +123,21 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
if (Kind.isMergeableConst16()) return MergeableConst16Section;
}
Type *ObjType = GV->getType()->getPointerElementType();
+ auto &DL = GV->getParent()->getDataLayout();
if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
- TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
+ DL.getTypeAllocSize(ObjType) < CodeModelLargeSize) {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySection
: DataRelROSection;
if (Kind.isBSS() || Kind.isCommon())return BSSSection;
- if (Kind.isDataRel()) return DataSection;
+ if (Kind.isData())
+ return DataSection;
if (Kind.isReadOnlyWithRel()) return DataRelROSection;
} else {
if (Kind.isReadOnly()) return UseCPRel? ReadOnlySectionLarge
: DataRelROSectionLarge;
if (Kind.isBSS() || Kind.isCommon())return BSSSectionLarge;
- if (Kind.isDataRel()) return DataSectionLarge;
+ if (Kind.isData())
+ return DataSectionLarge;
if (Kind.isReadOnlyWithRel()) return DataRelROSectionLarge;
}
@@ -142,9 +145,8 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
report_fatal_error("Target does not support TLS or Common sections");
}
-MCSection *
-XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind,
- const Constant *C) const {
+MCSection *XCoreTargetObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C) const {
if (Kind.isMergeableConst4()) return MergeableConst4Section;
if (Kind.isMergeableConst8()) return MergeableConst8Section;
if (Kind.isMergeableConst16()) return MergeableConst16Section;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
index 2a5ac23..6701c66 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -33,7 +33,7 @@ static const unsigned CodeModelLargeSize = 256;
Mangler &Mang,
const TargetMachine &TM) const override;
- MCSection *getSectionForConstant(SectionKind Kind,
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
const Constant *C) const override;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
index e23aef3..b2cb889 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -37,7 +37,7 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
const XCoreTargetLowering *getTLI() const { return TLI; }
public:
- explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F)
+ explicit XCoreTTIImpl(const XCoreTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
OpenPOWER on IntegriCloud